mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-10 15:18:11 -05:00
Compare commits
49 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
54ba8bfed1 | ||
|
|
55e13a3c38 | ||
|
|
d1debc7e45 | ||
|
|
169f3bbe5e | ||
|
|
e28eac2fe1 | ||
|
|
97ccce10ef | ||
|
|
217fb452f8 | ||
|
|
a7c158a14f | ||
|
|
85778177a1 | ||
|
|
28060c104b | ||
|
|
84177354de | ||
|
|
7458fcb7ab | ||
|
|
16d6e59003 | ||
|
|
fd9f576b26 | ||
|
|
a66bc1d85e | ||
|
|
36b6ffaf7c | ||
|
|
40e4ba3ecc | ||
|
|
1f41ce26be | ||
|
|
9293723381 | ||
|
|
588752ade4 | ||
|
|
c3faa9670b | ||
|
|
7246a673ce | ||
|
|
3f1c07afd7 | ||
|
|
b29b3592bd | ||
|
|
2b2732fe6f | ||
|
|
396b6375ba | ||
|
|
37a56b4ab6 | ||
|
|
fc162d11e0 | ||
|
|
34288b5af2 | ||
|
|
460e4be71d | ||
|
|
25ca422954 | ||
|
|
27edda496d | ||
|
|
9ff3c2c885 | ||
|
|
0d28491d16 | ||
|
|
7f708c8d87 | ||
|
|
2ab35b3732 | ||
|
|
d057d49af1 | ||
|
|
15ec4cf910 | ||
|
|
ec36f39665 | ||
|
|
310864e653 | ||
|
|
330aa6f23d | ||
|
|
eb090b8788 | ||
|
|
af18a170bc | ||
|
|
8b423430a4 | ||
|
|
21b22f89f5 | ||
|
|
87b9fac022 | ||
|
|
656db2bc84 | ||
|
|
295e1e2998 | ||
|
|
6770798faf |
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -27,12 +32,12 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
|
||||
- onnx>=1.14.1
|
||||
- numpy>=1.21.6
|
||||
- typing>=3.7.4
|
||||
- pytest>=6.0.1
|
||||
- packaging>=23.0
|
||||
- protobuf>=3.20.2
|
||||
- "onnx>=1.14.1"
|
||||
- "numpy>=1.21.6"
|
||||
- "typing>=3.7.4"
|
||||
- "pytest>=6.0.1"
|
||||
- "packaging>=23.0"
|
||||
- "protobuf>=3.20.2"
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -115,6 +120,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -145,7 +151,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -42,7 +47,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -67,6 +72,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -99,7 +105,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -125,6 +131,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
# compile clr
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -77,7 +82,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -97,6 +102,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- task: Bash@3
|
||||
displayName: Build and install other dependencies
|
||||
inputs:
|
||||
@@ -107,7 +113,7 @@ jobs:
|
||||
mkdir -p $(Agent.BuildDirectory)/miopen-deps
|
||||
export CXX=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
export CC=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
cmake -P install_deps.cmake --prefix $(Agent.BuildDirectory)/miopen-deps
|
||||
cmake -P install_deps.cmake --prefix $(Agent.BuildDirectory)/miopen-deps --generator Ninja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -142,7 +148,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
@@ -207,7 +214,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: MIOpen
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex test_rnn_seq_api'
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -103,6 +108,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -129,7 +135,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -42,7 +47,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -58,6 +63,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -78,7 +84,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -24,7 +29,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -40,6 +45,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
# reference: https://github.com/ROCm/ROCgdb/blob/amd-staging/README-ROCM.md
|
||||
- name: aptPackages
|
||||
type: object
|
||||
@@ -46,7 +51,8 @@ jobs:
|
||||
condition:
|
||||
and(
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
@@ -69,6 +75,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
|
||||
parameters:
|
||||
configureFlags: >-
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -96,6 +101,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -126,7 +132,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -46,7 +51,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -63,6 +68,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- task: Bash@3
|
||||
displayName: Create wheel file
|
||||
@@ -104,7 +110,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -67,6 +72,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -92,7 +98,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -58,7 +63,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -23,7 +28,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -39,6 +44,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: aomp-extras
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
# reference:
|
||||
# https://github.com/ROCm/aomp/blob/aomp-dev/docs/SOURCEINSTALL_PREREQUISITE.md
|
||||
- name: aptPackages
|
||||
@@ -108,6 +113,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: extras
|
||||
@@ -176,7 +182,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -71,6 +76,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
echo "##vso[task.prependpath]/usr/lib/ccache"
|
||||
@@ -117,7 +123,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -25,7 +30,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -41,6 +46,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -75,6 +80,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# compile hip-tests
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -109,7 +115,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -29,7 +34,7 @@ jobs:
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -45,6 +50,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -89,6 +94,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -121,13 +127,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -22,7 +27,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- joblib
|
||||
- packaging>=22.0
|
||||
- "packaging>=22.0"
|
||||
- --upgrade
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -98,6 +103,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
@@ -179,15 +185,14 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -67,6 +72,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -94,7 +100,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -79,6 +84,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -112,7 +118,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -70,6 +75,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -101,7 +107,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -82,6 +87,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# build external gtest and lapack
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -122,13 +128,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -77,6 +82,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -116,13 +122,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -91,6 +96,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
@@ -150,7 +156,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -66,6 +71,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -95,13 +101,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -76,6 +81,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -111,7 +117,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -45,6 +50,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLlvmSymlink: true
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocm-llvm
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -72,7 +77,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -90,6 +95,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -124,7 +130,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -89,6 +94,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
# Build grpc
|
||||
- task: Bash@3
|
||||
displayName: 'git clone grpc'
|
||||
@@ -135,7 +141,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -151,6 +156,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -182,7 +188,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -85,6 +90,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -117,7 +123,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -101,6 +106,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -145,13 +151,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -73,6 +78,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -92,7 +98,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -79,6 +84,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -113,7 +119,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -75,6 +80,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -83,6 +89,8 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
@@ -96,7 +104,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
@@ -115,6 +124,8 @@ jobs:
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -140,4 +151,3 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
optSymLink: true
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -62,6 +67,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -87,7 +93,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -66,6 +71,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -93,7 +99,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -77,6 +82,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- task: Bash@3
|
||||
displayName: 'Save Python Package Paths'
|
||||
inputs:
|
||||
@@ -152,7 +158,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -70,6 +75,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -98,7 +104,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -88,6 +93,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: lapack
|
||||
@@ -131,13 +137,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -29,6 +34,7 @@ parameters:
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- rocprofiler-register
|
||||
- roctracer
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -80,6 +86,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -126,13 +133,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -71,6 +76,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -98,7 +104,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -65,7 +70,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -81,6 +86,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -113,13 +119,12 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: AMD_COMGR_CACHE
|
||||
value: 0
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -26,7 +31,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -17,7 +22,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -100,6 +105,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
# https://github.com/ROCm/HIP/issues/2203
|
||||
@@ -136,7 +142,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,12 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -49,7 +55,7 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: ROCR_LIB_DIR
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -66,6 +72,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -90,7 +97,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -28,7 +33,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -59,7 +64,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -38,7 +43,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -55,6 +60,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
skipLlvmSymlink: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -72,7 +78,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,10 +5,17 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libdw-dev
|
||||
- libtbb-dev
|
||||
- locales
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
@@ -17,10 +24,10 @@ parameters:
|
||||
default:
|
||||
- astunparse==1.6.2
|
||||
- colorlover
|
||||
- dash>=1.12.0
|
||||
- "dash>=1.12.0"
|
||||
- matplotlib
|
||||
- numpy>=1.17.5
|
||||
- pandas>=1.4.3
|
||||
- "numpy>=1.17.5"
|
||||
- "pandas>=1.4.3"
|
||||
- pymongo
|
||||
- pyyaml
|
||||
- tabulate
|
||||
@@ -114,6 +121,7 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: ${{ job.dependencySource }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -140,7 +148,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
@@ -182,12 +191,9 @@ jobs:
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm compilers to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -206,18 +212,6 @@ jobs:
|
||||
componentName: rocprofiler-compute
|
||||
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
|
||||
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
|
||||
- task: Bash@3
|
||||
displayName: Remove ROCm binaries from PATH
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
- task: Bash@3
|
||||
displayName: Remove ROCm compilers from PATH
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/llvm/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -17,7 +22,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -32,20 +37,14 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
extraBuildFlags: >-
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register-tests
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
|
||||
-DROCPROFILER_REGISTER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
|
||||
-GNinja
|
||||
cmakeBuildDir: 'tests/build'
|
||||
installEnabled: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-register
|
||||
testDir: 'tests/build'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -31,7 +36,7 @@ parameters:
|
||||
- pandas
|
||||
- perfetto
|
||||
- pycobertura
|
||||
- pytest>=6.2.5
|
||||
- "pytest>=6.2.5"
|
||||
- pyyaml
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -89,6 +94,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- task: Bash@3
|
||||
displayName: Add Python site-packages binaries to path
|
||||
inputs:
|
||||
@@ -125,7 +131,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -6,6 +6,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -16,6 +21,7 @@ parameters:
|
||||
- bzip2
|
||||
- clang
|
||||
- cmake
|
||||
- chrpath
|
||||
- environment-modules
|
||||
- ffmpeg
|
||||
- g++-12
|
||||
@@ -61,6 +67,7 @@ parameters:
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
@@ -103,6 +110,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
@@ -124,6 +132,7 @@ jobs:
|
||||
-DDYNINST_BUILD_BOOST=ON
|
||||
-DROCPROFSYS_USE_PAPI=ON
|
||||
-DROCPROFSYS_USE_MPI=ON
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
@@ -147,7 +156,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
timeoutInMinutes: 180
|
||||
variables:
|
||||
@@ -197,6 +207,7 @@ jobs:
|
||||
-DDYNINST_BUILD_BOOST=ON
|
||||
-DROCPROFSYS_USE_PAPI=ON
|
||||
-DROCPROFSYS_USE_MPI=ON
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- task: Bash@3
|
||||
|
||||
@@ -5,6 +5,12 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -83,6 +89,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -115,7 +122,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -49,7 +54,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
@@ -65,6 +70,7 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -86,7 +92,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -77,6 +82,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -107,7 +113,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -5,6 +5,11 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
@@ -84,6 +89,7 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -113,7 +119,8 @@ jobs:
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
|
||||
@@ -68,7 +68,7 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- astunparse
|
||||
- expecttest>=0.2.1
|
||||
- "expecttest>=0.2.1"
|
||||
- hypothesis
|
||||
- numpy
|
||||
- psutil
|
||||
@@ -76,8 +76,8 @@ parameters:
|
||||
- requests
|
||||
- setuptools==75.8.0
|
||||
- types-dataclasses
|
||||
- typing-extensions>=4.8.0
|
||||
- sympy>=1.13.0
|
||||
- "typing-extensions>=4.8.0"
|
||||
- "sympy>=1.13.0"
|
||||
- filelock
|
||||
- networkx
|
||||
- jinja2
|
||||
@@ -85,8 +85,8 @@ parameters:
|
||||
- lintrunner
|
||||
- ninja
|
||||
- packaging
|
||||
- optree>=0.13.0
|
||||
- click>=8.0.3
|
||||
- "optree>=0.13.0"
|
||||
- "click>=8.0.3"
|
||||
# list for vision
|
||||
- auditwheel
|
||||
- future
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import requests
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
def get_builds(entries, gpu_target, output):
|
||||
already_downloaded = {}
|
||||
for entry in entries:
|
||||
already_downloaded = _get_builds(entry, gpu_target, already_downloaded, output)
|
||||
|
||||
def _get_builds(entry, gpu_target, already_downloaded, output):
|
||||
print()
|
||||
print(f"{entry['buildNumber']} - {entry['buildId']} - {entry['repoName']}")
|
||||
if already_downloaded.get(entry['buildId']):
|
||||
print('Skipping, already downloaded from build ' + entry['buildId'])
|
||||
return already_downloaded
|
||||
|
||||
artifacts_url = f"https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/{entry['buildId']}/artifacts?api-version=7.1"
|
||||
artifacts = requests.get(artifacts_url).json()
|
||||
for artifact in artifacts['value']:
|
||||
if 'gfx' in artifact['name'] and gpu_target not in artifact['name']:
|
||||
continue
|
||||
|
||||
print('Artifact name: ' + artifact['name'])
|
||||
print('File size: ~' +
|
||||
str(round(int(artifact['resource']['properties']['artifactsize'])/1000000, 2)) + ' MB')
|
||||
download_url = f"{artifact['resource']['downloadUrl']}"
|
||||
download = requests.get(download_url)
|
||||
|
||||
zip_file = Path(output) / f"{artifact['name']}.zip"
|
||||
with open(zip_file, 'wb') as f:
|
||||
f.write(download.content)
|
||||
already_downloaded[entry['buildId']] = True
|
||||
|
||||
return already_downloaded
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Command line tool for downloading external ci artifacts")
|
||||
parser.add_argument('--target', type=str, dest="target", choices=["gfx90a", "gfx942"], help="Target gfx")
|
||||
parser.add_argument('--manifest', type=str, dest="manifest", help='JSON manifest url or path to local manifest')
|
||||
parser.add_argument('--output_dir', type=str, dest="output", help='Path to download directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
manifest = args.manifest
|
||||
gpu_target = args.target
|
||||
|
||||
if not gpu_target:
|
||||
print("Enter the GPU target (gfx942, gfx90a)")
|
||||
gpu_target = input()
|
||||
|
||||
if not manifest:
|
||||
print("Enter the manifest file (URL or local path)")
|
||||
manifest = input()
|
||||
|
||||
if 'http' in manifest:
|
||||
data = requests.get(manifest).json()
|
||||
else:
|
||||
with open(manifest, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
entries = [e for e in data['current']]
|
||||
entries.extend([e for e in data['dependencies']])
|
||||
get_builds(entries, gpu_target, args.output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1 @@
|
||||
requests
|
||||
@@ -12,6 +12,11 @@ parameters:
|
||||
- name: fileFilter
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
@@ -27,17 +32,23 @@ steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.componentName }}
|
||||
inputs:
|
||||
buildType: 'specific'
|
||||
project: ROCm-CI
|
||||
definition: ${{ parameters.pipelineId }}
|
||||
specificBuildWithTriggering: true
|
||||
itemPattern: '**/*${{ parameters.fileFilter }}*'
|
||||
# aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
|
||||
${{ if notIn(parameters.componentName, 'aomp') }}:
|
||||
buildVersionToDownload: latestFromBranch # default is 'latest'
|
||||
branchName: refs/heads/${{ parameters.branchName }}
|
||||
allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
${{ if eq(parameters.aggregatePipeline, false) }}:
|
||||
buildType: 'specific'
|
||||
project: ROCm-CI
|
||||
definition: ${{ parameters.pipelineId }}
|
||||
specificBuildWithTriggering: true
|
||||
itemPattern: '**/*${{ parameters.fileFilter }}*'
|
||||
# aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
|
||||
${{ if notIn(parameters.componentName, 'aomp') }}:
|
||||
buildVersionToDownload: latestFromBranch # default is 'latest'
|
||||
branchName: refs/heads/${{ parameters.branchName }}
|
||||
allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
${{ else }}:
|
||||
buildType: 'current'
|
||||
itemPattern: '**/${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
|
||||
@@ -31,6 +31,11 @@ parameters:
|
||||
- name: setupHIPLibrarySymlinks
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
- name: componentVarList
|
||||
type: object
|
||||
@@ -354,6 +359,7 @@ steps:
|
||||
parameters:
|
||||
componentName: ${{ split(dependency, ':')[0] }}
|
||||
pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*${{ parameters.gpuTarget }}"
|
||||
# dependencySource = staging
|
||||
@@ -383,6 +389,7 @@ steps:
|
||||
parameters:
|
||||
componentName: ${{ dependency }}
|
||||
pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
fileFilter: ${{ parameters.gpuTarget }}
|
||||
# dependencySource = staging
|
||||
|
||||
82
.azuredevops/templates/steps/docker-container-commit.yml
Normal file
82
.azuredevops/templates/steps/docker-container-commit.yml
Normal file
@@ -0,0 +1,82 @@
|
||||
# This template creates and uploads a Docker image from the current environment
|
||||
# It uses `docker commit` to do so, which can provide more accurate images than the standard template
|
||||
# It requires the following conditions:
|
||||
# - Job must be run inside a Docker container
|
||||
# - The container's external name must be the same as the container's internal hostname
|
||||
# - Docker must be installed inside said container and given sufficient permissions
|
||||
# Currently, it is only usable for test jobs run on our self-managed systems
|
||||
# Jobs run on Azure VMs will not be able to use this template (most if not all build jobs)
|
||||
|
||||
parameters:
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
- name: environment
|
||||
type: string
|
||||
default: build
|
||||
values:
|
||||
- build
|
||||
- test
|
||||
- combined
|
||||
- amd
|
||||
- nvidia
|
||||
- name: extraPaths
|
||||
type: string
|
||||
default: ''
|
||||
- name: extraEnvVars
|
||||
type: object
|
||||
default: []
|
||||
- name: forceDockerCreation
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Commit container and initialize Dockerfile
|
||||
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
docker commit $(hostname) az-ci-temp-image:latest
|
||||
echo "FROM az-ci-temp-image:latest" > Dockerfile
|
||||
echo "RUN sudo groupmod -g $(getent group render | awk -F: '{print $3}') render" >> Dockerfile
|
||||
echo "RUN sudo groupmod -g $(getent group docker | awk -F: '{print $3}') docker" >> Dockerfile
|
||||
echo "ENV PATH='$PATH:${{ parameters.extraPaths }}'" >> Dockerfile
|
||||
echo "ENTRYPOINT [\"/bin/bash\"]" >> Dockerfile
|
||||
- ${{ each extraEnvVar in parameters.extraEnvVars }}:
|
||||
- task: Bash@3
|
||||
displayName: Add extra environment variables
|
||||
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: echo "ENV ${{ split(extraEnvVar, ':::')[0] }}='${{ split(extraEnvVar, ':::')[1] }}'" >> Dockerfile
|
||||
- task: Bash@3
|
||||
displayName: Print Dockerfile
|
||||
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: cat Dockerfile
|
||||
- task: Docker@2
|
||||
displayName: Build and upload Docker image
|
||||
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
inputs:
|
||||
containerRegistry: 'ContainerService3'
|
||||
${{ if ne(parameters.gpuTarget, '') }}:
|
||||
repository: '$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}'
|
||||
${{ else }}:
|
||||
repository: '$(Build.DefinitionName)-${{ parameters.environment }}'
|
||||
Dockerfile: '$(Pipeline.Workspace)/Dockerfile'
|
||||
buildContext: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
displayName: "!! Docker Image URL !!"
|
||||
inputs:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
${{ if ne(parameters.gpuTarget, '') }}:
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
|
||||
${{ else }}:
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
|
||||
@@ -334,7 +334,7 @@ steps:
|
||||
- task: Docker@2
|
||||
condition: or(and(failed(), ${{ not(containsValue(parameters.dockerSkipGfx, parameters.gpuTarget)) }}), ${{ eq(parameters.forceDockerCreation, true) }})
|
||||
inputs:
|
||||
containerRegistry: 'ContainerService'
|
||||
containerRegistry: 'ContainerService3'
|
||||
${{ if ne(parameters.gpuTarget, '') }}:
|
||||
repository: '$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}'
|
||||
${{ else }}:
|
||||
@@ -348,6 +348,6 @@ steps:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
${{ if ne(parameters.gpuTarget, '') }}:
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)"
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
|
||||
${{ else }}:
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)"
|
||||
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
|
||||
|
||||
@@ -54,7 +54,7 @@ steps:
|
||||
fi
|
||||
|
||||
echo "Downloading CK artifact from $ARTIFACT_URL"
|
||||
wget -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
|
||||
wget --tries=5 --waitretry=10 --retry-connrefused -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
tar -zxvf $(System.ArtifactsDirectory)/$ARTIFACT_NAME/*.tar.gz -C $(Agent.BuildDirectory)/rocm
|
||||
|
||||
@@ -30,13 +30,13 @@ variables:
|
||||
- name: GFX90A_TEST_POOL
|
||||
value: gfx90a_test_pool
|
||||
- name: LATEST_RELEASE_VERSION
|
||||
value: 6.3.4
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.3.4
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 6.4.0
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.4
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 6.5.0
|
||||
- name: LATEST_RELEASE_TAG
|
||||
value: rocm-6.3.4
|
||||
value: rocm-6.4.0
|
||||
- name: DOCKER_SKIP_GFX
|
||||
value: gfx90a
|
||||
- name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
|
||||
|
||||
@@ -76,6 +76,7 @@ Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CuPy
|
||||
da
|
||||
Dashboarding
|
||||
DBRX
|
||||
DDR
|
||||
@@ -751,6 +752,7 @@ profilers
|
||||
protobuf
|
||||
pseudorandom
|
||||
py
|
||||
pytorch
|
||||
recommender
|
||||
recommenders
|
||||
quantile
|
||||
|
||||
121
CHANGELOG.md
121
CHANGELOG.md
@@ -4,9 +4,117 @@ This page is a historical overview of changes made to ROCm components. This
|
||||
consolidated changelog documents key modifications and improvements across
|
||||
different versions of the ROCm software stack and its components.
|
||||
|
||||
## ROCm 6.4.1
|
||||
|
||||
See the [ROCm 6.4.1 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
|
||||
for a complete overview of this release.
|
||||
|
||||
### **AMD SMI** (25.4.2)
|
||||
|
||||
#### Added
|
||||
|
||||
* Dumping CPER entries from RAS tool `amdsmi_get_gpu_cper_entries()` to Python and C APIs.
|
||||
- Dumping CPER entries consist of `amdsmi_cper_hdr_t`.
|
||||
- Dumping CPER entries is also enabled in the CLI interface through `sudo amd-smi ras --cper`.
|
||||
|
||||
#### Resolved
|
||||
|
||||
* Fixed partition enumeration in `amd-smi list -e`, `amdsmi_get_gpu_enumeration_info()`, `amdsmi_enumeration_info_t`, `drm_card`, and `drm_render` fields.
|
||||
|
||||
```{note}
|
||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **HIP** (6.4.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* New debug mask, to print precise code object information for logging.
|
||||
|
||||
#### Changed
|
||||
|
||||
* Calling the code object has changed. HIP runtime now uses device bitcode before SPIR-V.
|
||||
|
||||
#### Optimized
|
||||
|
||||
* Improved kernel logging using the demangling shader names.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Stale state during the graph capture. The return error was fixed, and HIP runtime now always uses the latest dependent nodes during `hipEventRecord` capture.
|
||||
* Issue of `hipEventRecords` failing to call the `hip::getStream` runtime function.
|
||||
|
||||
### **hipBLASLt** (0.12.1)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an accuracy issue for some solutions using an `FP32` or `TF32` data type with a TT transpose.
|
||||
|
||||
### **RCCL** (2.22.3)
|
||||
|
||||
#### Changed
|
||||
|
||||
* MSCCL++ is now disabled by default. To enable it, set `RCCL_MSCCLPP_ENABLE=1`.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where early termination, in rare circumstances, could cause the application to stop responding by adding synchronization before destroying a proxy thread.
|
||||
* Fixed the accuracy issue for the MSCCLPP `allreduce7` kernel in graph mode.
|
||||
|
||||
### **rocALUTION** (3.2.3)
|
||||
|
||||
#### Added
|
||||
|
||||
* The `-a` option has been added to the `rmake.py` build script. This option allows you to select specific architectures when building on Microsoft Windows.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where the `HIP_PATH` environment variable was being ignored when compiling on Microsoft Windows.
|
||||
|
||||
### **ROCm Data Center Tool** (0.3.0)
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for GPU partitions.
|
||||
- `RDC_FI_GPU_BUSY_PERCENT` metric.
|
||||
|
||||
#### Changed
|
||||
|
||||
- Updated `rdc_field` to align with `rdc_bootstrap` for current metrics.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed [ROCProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html) eval metrics and memory leaks.
|
||||
|
||||
### **ROCm SMI** (7.5.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed partition enumeration. It now refers to the correct DRM Render and Card paths.
|
||||
|
||||
```{note}
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **ROCm Systems Profiler** (1.0.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* How-to document for [network performance profiling](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/amd-staging/how-to/nic-profiling.html) for standard Network Interface Cards (NICs).
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a build issue with Dyninst on GCC 13.
|
||||
|
||||
### **ROCr Runtime** (1.15.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a rare occurrence issue on AMD Instinct MI25, MI50, and MI100 GPUs, where the `SDMA` copies might start before the dependent Kernel finishes and could cause memory corruption.
|
||||
|
||||
## ROCm 6.4.0
|
||||
|
||||
See the [ROCm 6.4.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
|
||||
See the [ROCm 6.4.0 release notes](https://rocm.docs.amd.com/en/docs-6.4.0/about/release-notes.html)
|
||||
for a complete overview of this release.
|
||||
|
||||
### **AMD SMI** (25.3.0)
|
||||
@@ -125,8 +233,7 @@ Some workaround options are as follows:
|
||||
- The `pasid` field in struct `amdsmi_process_info_t` will be deprecated in a future ROCm release.
|
||||
|
||||
```{note}
|
||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
|
||||
and in-depth descriptions.
|
||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **AMDMIGraphX** (2.12.0)
|
||||
@@ -678,7 +785,6 @@ The following lists the backward incompatible changes planned for upcoming major
|
||||
|
||||
* Roofline support for Ubuntu 24.04.
|
||||
* Experimental support `rocprofv3` (not enabled as default).
|
||||
* Experimental feature: Spatial multiplexing.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
@@ -737,8 +843,7 @@ The following lists the backward incompatible changes planned for upcoming major
|
||||
- Fixed `rsmi_dev_target_graphics_version_get`, `rocm-smi --showhw`, and `rocm-smi --showprod` not displaying graphics version correctly for Instinct MI200 series, MI100 series, and RDNA3-based GPUs.
|
||||
|
||||
```{note}
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
|
||||
and in-depth descriptions.
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **ROCm Systems Profiler** (1.0.0)
|
||||
@@ -3456,7 +3561,7 @@ See [issue #3499](https://github.com/ROCm/ROCm/issues/3499) on GitHub.
|
||||
|
||||
- Error when running Omniperf with an application with command line arguments. As a workaround, create an
|
||||
intermediary script to call the application with the necessary arguments, then call the script with Omniperf. This
|
||||
issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/omniperf/issues/347).
|
||||
issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/rocprofiler-compute/issues/347).
|
||||
|
||||
- Omniperf might not work with AMD Instinct MI300 accelerators out of the box, resulting in the following error:
|
||||
"*ERROR gfx942 is not enabled rocprofv1. Available profilers include: ['rocprofv2']*". As a workaround, add the
|
||||
@@ -4333,7 +4438,7 @@ for a complete overview of this release.
|
||||
* New multiple node and GPU support.
|
||||
Unsmoothed and smoothed aggregations and Ruge-Stueben AMG now work with multiple nodes
|
||||
and GPUs. For more information, refer to the
|
||||
[API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/latest/usermanual/solvers.html#unsmoothed-aggregation-amg).
|
||||
[API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.1.0/usermanual/solvers.html#unsmoothed-aggregation-amg).
|
||||
|
||||
### **rocDecode** (0.5.0)
|
||||
|
||||
|
||||
81
Manifest6.4.0
Normal file
81
Manifest6.4.0
Normal file
@@ -0,0 +1,81 @@
|
||||
This XML file does not appear to have any style information associated with it. The document tree is shown below.
|
||||
<manifest>
|
||||
<remote name="gerritgit" fetch="ssh://gerritgit/" review="gerrit-git.amd.com"/>
|
||||
<remote name="lightning-ghemu" fetch="ssh://github-emu/AMD-Lightning-Internal"/>
|
||||
<remote name="rocm" fetch="https://github.com/ROCm"/>
|
||||
<remote name="rocm-ghemu" fetch="ssh://github-emu/AMD-ROCm-Internal"/>
|
||||
<default remote="gerritgit" revision="release/rocm-rel-6.4" sync-j="4" sync-c="true"/>
|
||||
<project name="AMDMIGraphX" remote="rocm" revision="908b94a3f0822a4fee89d99c3cfc51cd9c93f2f6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIOpen" remote="rocm" revision="f10c6ed8085cfabf8877294ab44301d8180999e8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIVisionX" remote="rocm" revision="a2b69e5b30f2dbdf66055ec99a2b5559b572f7af" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="OpenCL-CLHPP" remote="rocm-ghemu" revision="6f7e82dee83aea7f277a4b874da309902ea51f6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="OpenCL-Headers" remote="rocm-ghemu" revision="848d67b6fd471318816a81601d469b086487d18e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCR-Runtime" remote="rocm-ghemu" revision="1d9f08cabd33bd6302add72d0be2bfe0e64eea3a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCdbgapi" remote="rocm-ghemu" revision="59be7ff0aaafe82feb78f30990c8fdf62838cc98" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCgdb" remote="rocm-ghemu" revision="401bb21f2f3c72bbb90ccce12dc3ef481f9a1d8a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCmValidationSuite" remote="rocm" revision="5f1a9665f6241b0346c88cfd21a6073628da3593" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="Tensile" remote="rocm" revision="be49885fce2a61b600ae4593f1c2d00c8b4fa11e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="TransferBench" remote="rocm" revision="3ea2f226ec818158ba97e4ee0ec0b589f13f4641" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="amdsmi" remote="rocm-ghemu" revision="e6a209ef809f1b09a424572afd685ec754a9042b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="aomp" path="openmp-extras/aomp" remote="lightning-ghemu" revision="24932c59c0759a57ee52d327d9a10a2e466e35a7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" remote="lightning-ghemu" revision="6f8038ada9dec082ea091d30c98c0834669d12a1" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aqlprofile" remote="rocm-ghemu" revision="7fae75ec6bf7b1a631707ae859542d733f8a1f43" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="build-infra" path="ROCm" remote="rocm-ghemu" revision="811ec9cc6d1588bf66619365b9b4db96ac6acf68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra"/>
|
||||
<project name="clr" remote="rocm-ghemu" revision="a1adcfdd44f4560c0268e36c8afeb94f760dc963" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="composable_kernel" remote="rocm" revision="a8c5bd9b9ad950c3e742877e01cb784da91664e3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="compute/ec/hip-examples" path="HIP-Examples" revision="41b0cff8077a25390c2bbda827eb9f6f37ec1ef3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/hip-examples-private" path="hip-examples-private" revision="dc69edb405804987753735a369478503d82ce9c2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/jenkins-utils" path="jenkins-utils" revision="bb517b014ff055b62d3860addc23ddd06b0c3e6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/ml-framework-ci" path="ml-framework-ci" revision="83440e22ebf1e9443b6df737224c1e5e2b91e0c4" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,framework"/>
|
||||
<project name="compute/ec/packaging/meta" path="meta" revision="c7cffa2e4199da1fd68b8b3568282dd59d49a4df" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/prototype" path="build" revision="d71a2766e11e057e5c698caea8fc4ebc0f72cb3e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/rocm_bandwidth_test" path="rocm_bandwidth_test" revision="84b8ddd2686be9bd3e438126b44e6bb10d94d522" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="flang" path="openmp-extras/flang" remote="lightning-ghemu" revision="390169508a03cecf85d43f5cee41e223355f598f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="half" remote="rocm" revision="1ddada225144cac0de8f6b5c0dd9acffd99a2e68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hip" remote="rocm-ghemu" revision="22b0b2eb9a09e30dca11b213872127f9caa2e1e7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hip-tests" remote="rocm-ghemu" revision="dc28111737706aad93e38c2f746ccbc13dbf1b80" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hipBLAS" remote="rocm" revision="0a335435e9c8a833d7106e4ae5057eb58cea2fef" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLAS-common" remote="rocm" revision="7c1566ba4628e777b91511242899b6df48555d04" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLASLt" remote="rocm" revision="4d62e135cfb4008cf7b508995cad347a1bc750c8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipCUB" remote="rocm" revision="a6005943c5804535990429925318e7900eb6e801" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipFFT" remote="rocm" revision="396169c84a2bb3c7ed7245caefe66002138e7c6c" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipRAND" remote="rocm" revision="d2516cc199690fd91abfdc5908ecfd88e3553067" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSOLVER" remote="rocm" revision="ca0de3c9c95df4345b76cd8a56e72c84b7d5fc79" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSE" remote="rocm" revision="a6c62e48eb8a2326475f7bbb4705c5b926a5edc8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSELt" remote="rocm" revision="f3f4f590a49ae9f9c9ce1451c42db4c2bfd00eed" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipTensor" remote="rocm" revision="e5529b92914be79e4887a92b48b30f88b616c9a5" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipfort" remote="rocm" revision="f3d6aa3e8657d665a43fa2815ca2e49ce39a464a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipify" path="HIPIFY" remote="lightning-ghemu" revision="ed0de49132211c6ddbd40f5cd89b5841e832ac3d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="hipother" remote="rocm-ghemu" revision="49b1588f834dbe1a4db1bddb3647a91b15f618b8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="llvm-project" remote="lightning-ghemu" revision="aa0c041cb49bb50af268504907b7899fec59ae4e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="rccl" remote="rocm" revision="12f8f61f3a5db87bf158c60fdd5e38a32c903b08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rdc" remote="rocm-ghemu" revision="0224310c872df0fae56ffc883c50c7f47dc82870" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocAL" remote="rocm" revision="373ef865aca43528559e7a9134f09e49a9e9b7c6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocALUTION" remote="rocm" revision="cb256de3574a4fcbc6a52ed5986b787173cd6dc2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocBLAS" remote="rocm" revision="80e5394d6a68901ce48b03da47b33b1e69d58be7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocDecode" remote="rocm" revision="a2a7b63cad8f90a94e21232b44460a8fb2d52304" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocFFT" remote="rocm" revision="058ba87fdcfdae334dbc8dbe048955b248e9328a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocJPEG" remote="rocm" revision="73d36d35d90137ffbfcec276bdf973823ef0c0b9" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPRIM" remote="rocm" revision="d8771ec18ad45c4d697800c22fb21241f22a915f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPyDecode" remote="rocm" revision="848e49d29d4d6173fb4b57a9223ce68c049baa28" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocRAND" remote="rocm" revision="4d5d3a88d1898705dadf5c06e7b0400d51a13c36" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSHMEM" remote="rocm" revision="7702b3c0f3f41baf6a80aa6b22fa90dec1a6801e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSOLVER" remote="rocm" revision="db754e3f55daab54abb86f17cd6b4066c504e163" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSPARSE" remote="rocm" revision="4953add0aee37ad26700e8bcd6defbfa6b3a4d08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocThrust" remote="rocm" revision="6bf2777019827e1a2898547ced9a03bf5024ed7d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocWMMA" remote="rocm" revision="1a5b6231663fcf3e00abf790aeae843278f16a65" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-cmake" remote="rocm" revision="ecc716b97c2239cff00422ed7a43cd52a0839a0e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-core" remote="rocm-ghemu" revision="73dae9c82ace4fb8e1e4028f86ff0365f21c9f51" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="rocm-examples" remote="rocm" revision="3bbd2987a3b46cfd2c8348c2317042f3ad604e38" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocm_smi_lib" remote="rocm-ghemu" revision="1f242d314916336d6ce5c731f486edfaa8f0b987" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocminfo" remote="rocm-ghemu" revision="6ea2ba38c8e1ab2899acf66878148b1192fd0bee" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler" remote="rocm-ghemu" revision="40da7312a06f8052f5c148a4709cab64686f881d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-compute" remote="rocm" revision="7b25d958b4e030ea64a24ed0a62dcac1e48193ab" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-register-internal" path="rocprofiler-register" remote="rocm-ghemu" revision="7c6cd44f637d400b50b803b0b351be302ad6827d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-sdk-internal" path="rocprofiler-sdk" remote="rocm-ghemu" revision="e8e49fe76971000a42a5a177d9a727d16dd0ebcf" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-systems" remote="rocm" revision="2e945e4a08781e13a822f568814e2c434fd8858f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocr_debug_agent" remote="rocm-ghemu" revision="9eec1a52a36b5203bbac54a1b442fe9a45b6a43e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="roctracer" remote="rocm-ghemu" revision="f55a6943816641c081aa167c8a45904ddae2ba5e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rpp" remote="rocm" revision="5fb204ca7018b87889e061b720c5b06f6b9bce9b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="spirv-llvm-translator" path="llvm-project/llvm/projects/SPIRV-LLVM-Translator" remote="lightning-ghemu" revision="ae12ddbec86765df369b18ac764e170082079819" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
</manifest>
|
||||
1386
RELEASE.md
1386
RELEASE.md
File diff suppressed because it is too large
Load Diff
@@ -81,6 +81,7 @@ additional licenses. Please review individual repositories for more information.
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
|
||||
@@ -1,121 +1,122 @@
|
||||
ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.20,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
Tested user space versions,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.3.x, 6.2.x, 6.1.x, 6.0.x","6.3.x, 6.2.x, 6.1.x, 6.0.x","6.3.x, 6.2.x, 6.1.x, 6.0.x","6.3.x, 6.2.x, 6.1.x, 6.0.x","6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.2,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60102,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
ROCm Version,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
||||
|
@@ -23,15 +23,15 @@ compatibility and system requirements.
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.0", "6.3.3", "6.2.0"
|
||||
:header: "ROCm Version", "6.4.1", "6.4.0", "6.3.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3"
|
||||
,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9"
|
||||
,"SLES 15 SP6","SLES 15 SP6, SP5","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_,Oracle Linux 8.9 [#mi300x]_
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
@@ -43,106 +43,107 @@ compatibility and system requirements.
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942 [#mi300_620]_
|
||||
,gfx942,gfx942,gfx942
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.20,1.17.3,1.17.3
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.3.2,2.2.0
|
||||
CUB,2.5.0,2.3.2,2.2.0
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
Tested user space versions,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.3.x, 6.2.x, 6.1.x, 6.0.x"
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.10.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.6.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.1.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.8.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.20.5
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.2.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.8.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.14
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.2.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.1
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.1
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.0
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.2.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.28
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.1.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.26.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.5.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.41.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.3.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.0.1
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.2.41133
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.2.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,20240607.1.4246
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.6.2
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.3.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.0.60200
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,2.0.1
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,1.11.2
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60200
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.4.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60200
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60300
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.13.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.76.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,14.2.0
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.2.41133
|
||||
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.2.41133
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42131
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.13.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
@@ -58,7 +58,7 @@ Docker image compatibility
|
||||
AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are validated for
|
||||
`ROCm 6.3.1 <https://repo.radeon.com/rocm/apt/6.3.1/>`_. Click the |docker-icon|
|
||||
`ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click the |docker-icon|
|
||||
icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: JAX Docker image components
|
||||
@@ -68,24 +68,26 @@ icon to view the image on Docker Hub.
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.12/images/sha256-085a0cd5207110922f1fca684933a9359c66d42db6c5aba4760ed5214fdabde0"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.12/images/sha256-4069398229078f3311128b6d276c6af377c7e97d3363d020b0bf7154fae619ca"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 24.04
|
||||
- `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.10/images/sha256-f88eddad8f47856d8640b694da4da347ffc1750d7363175ab7dc872e82b43324"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.10/images/sha256-a137f901f91ce6c13b424c40a6cf535248d4d20fd36d5daf5eee0570190a4a11"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.14 <https://www.python.org/downloads/release/python-31014/>`_
|
||||
|
||||
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are tested for `ROCm 6.2.4 <https://repo.radeon.com/rocm/apt/6.2.4/>`_.
|
||||
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
|
||||
|
||||
.. list-table:: JAX community Docker image components
|
||||
:header-rows: 1
|
||||
@@ -94,27 +96,30 @@ associated inventories are tested for `ROCm 6.2.4 <https://repo.radeon.com/rocm/
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.12.7/images/sha256-a6032d89c07573b84c44e42c637bf9752b1b7cd2a222d39344e603d8f4c63beb?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
|
||||
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.11.10/images/sha256-d462f7e445545fba2f3b92234a21beaa52fe6c5f550faabcfdcd1bf53486d991?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.11.10 <https://www.python.org/downloads/release/python-31110/>`_
|
||||
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.10.15/images/sha256-6f2d4d0f529378d9572f0e8cfdcbc101d1e1d335bd626bb3336fff87814e9d60?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.15 <https://www.python.org/downloads/release/python-31015/>`_
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
Critical ROCm libraries for JAX
|
||||
================================================================================
|
||||
|
||||
@@ -21,31 +21,68 @@ release cycles for PyTorch on ROCm:
|
||||
|
||||
- ROCm PyTorch release:
|
||||
|
||||
- Provides the latest version of ROCm but doesn't immediately support the latest stable PyTorch
|
||||
version.
|
||||
- Provides the latest version of ROCm but might not necessarily support the
|
||||
latest stable PyTorch version.
|
||||
|
||||
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
|
||||
pre-installed.
|
||||
preinstalled.
|
||||
|
||||
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`_
|
||||
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` to get started.
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
to get started.
|
||||
|
||||
- Official PyTorch release:
|
||||
|
||||
- Provides the latest stable version of PyTorch but doesn't immediately support the latest ROCm version.
|
||||
- Provides the latest stable version of PyTorch but might not necessarily
|
||||
support the latest ROCm version.
|
||||
|
||||
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`_
|
||||
|
||||
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`_
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_ to get started.
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_
|
||||
to get started.
|
||||
|
||||
The upstream PyTorch includes an automatic HIPification solution that automatically generates HIP
|
||||
source code from the CUDA backend. This approach allows PyTorch to support ROCm without requiring
|
||||
manual code modifications.
|
||||
PyTorch includes tooling that generates HIP source code from the CUDA backend.
|
||||
This approach allows PyTorch to support ROCm without requiring manual code
|
||||
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
|
||||
|
||||
Development of ROCm is aligned with the stable release of PyTorch while upstream PyTorch testing uses
|
||||
the stable release of ROCm to maintain consistency.
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency.
|
||||
|
||||
.. _pytorch-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
|
||||
guides how to leverage the ROCm platform for training AI models. It covers the
|
||||
steps, tools, and best practices for optimizing training workflows on AMD GPUs
|
||||
using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning
|
||||
and inference of machine learning models, particularly large language models
|
||||
(LLMs), on systems with a single GPU. This topic provides a detailed guide for
|
||||
setting up, optimizing, and executing fine-tuning and inference workflows in
|
||||
such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning
|
||||
models on systems with multiple GPUs.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. This guide helps users achieve optimal performance for
|
||||
deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the
|
||||
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
|
||||
GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
|
||||
|
||||
.. _pytorch-docker-compat:
|
||||
|
||||
@@ -56,10 +93,10 @@ Docker image compatibility
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_.
|
||||
Click the |docker-icon| icon to view the image on Docker Hub.
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: PyTorch Docker image components
|
||||
:header-rows: 1
|
||||
@@ -79,9 +116,84 @@ Click the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu24.04_py3.12_pytorch_release_2.4.0/images/sha256-6c798857b2c9526b44ba535710b93a1737546acea79b53a93c646195c272f1d5"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-ab1d350b818b90123cfda31363019d11c0d41a8f12a19e3cb2cb40cf0261137d"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-130536fdfceb374626a7bcb8d00b9d796ddfc3115677d51229e5b852d96b5ef4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-20a2e24b4738dc1f1a44a04f23827918b56c99f7e697e6fccb90e9c4fae8ca9b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-f09cb8ca39cc39222fb554060711f5c19130f7b4047aaf41fad4ba3ec470ca03"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 22.04
|
||||
- `3.11.9 <https://www.python.org/downloads/release/python-3119/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-a91c100d1fe608dae3eb7f60a751630363d4027ac3d077d428e92945204c338e"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-66a89ce6485bb887af74bb9bd76bb613ab9834a6b1374649ea7ae379883454a4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
|
||||
@@ -94,116 +206,55 @@ Click the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.4.0/images/sha256-a09b21248133876fc8912a5ff4e6ee2c8d62b14120313e426b3dadda5702713d"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-c716cf167e6e49893f11de03606ed37044153aca089e74ca615065c06877f86b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_2.4.0/images/sha256-963187534467f0f9da77996762fc1d112a6faa5372277c348a505533e7876ec8"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-0434cbc9b07b2c26e39480d7447f676f9057a1054dcff00e0050c25a6eddbd3c"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 22.04
|
||||
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-952f2621bd2bf3078bef19061e05b209105a82a7908e7e6cdf85014938a4d93a"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-688b1c0073092615fb98778d78b16191e506097ee116a2d3d2628b264d5d367b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.2.1/images/sha256-a2fe20e170feb9e05da3e5728bb98e40d08567e137be8e6ba797962ed2852608"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
|
||||
- `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_2.2.1/images/sha256-7f231937c897cca5f89e360be33c70a2017d60f62d1fbe81292be48c15fe345b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
|
||||
- 20.04
|
||||
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
|
||||
- `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
|
||||
- `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_1.13.1/images/sha256-616a47758004f91951e2da6c1fe291f903de65a7b2318d4b18359b48fe3032f4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
|
||||
- 22.04
|
||||
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
|
||||
- `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
|
||||
- `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
|
||||
- `2.19.0 <https://github.com/tensorflow/tensorboard/tree/2.19>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
|
||||
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_1.13.1/images/sha256-a2cfb365aea58b84595e241ffdb0d5ef3e6566e98c10b5499f4aa29983a74ea2"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
|
||||
- 20.04
|
||||
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
|
||||
- `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
|
||||
- `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18>`_
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`_
|
||||
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
Critical ROCm libraries for PyTorch
|
||||
Key ROCm libraries for PyTorch
|
||||
================================================================================
|
||||
|
||||
The functionality of PyTorch with ROCm is determined by its underlying library
|
||||
dependencies. These critical ROCm components affect the capabilities,
|
||||
performance, and feature set available to developers. The versions described
|
||||
are available in ROCm :version:`rocm_version`.
|
||||
PyTorch functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
@@ -223,24 +274,23 @@ are available in ROCm :version:`rocm_version`.
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations like matrix multiplication, matrix-vector products,
|
||||
and tensor contractions. Utilized in both dense and batched linear
|
||||
algebra operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- It accelerates operations like ``torch.matmul``, ``torch.mm``, and the
|
||||
- Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
|
||||
matrix multiplications used in convolutional and linear layers.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations like ``torch.sum``, ``torch.cumsum``, ``torch.sort``
|
||||
and ``torch.topk``. Operations on sparse tensors or tensors with
|
||||
irregular shapes often involve scanning, sorting, and filtering, which
|
||||
hipCUB handles efficiently.
|
||||
- Supports operations such as ``torch.sum``, ``torch.cumsum``,
|
||||
``torch.sort`` irregular shapes often involve scanning, sorting, and
|
||||
filtering, which hipCUB handles efficiently.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
@@ -248,8 +298,8 @@ are available in ROCm :version:`rocm_version`.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
- The ``torch.rand``, ``torch.randn`` and stochastic layers like
|
||||
``torch.nn.Dropout``.
|
||||
- The ``torch.rand``, ``torch.randn``, and stochastic layers like
|
||||
``torch.nn.Dropout`` rely on hipRAND.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
@@ -320,7 +370,7 @@ are available in ROCm :version:`rocm_version`.
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads.
|
||||
``torchvision`` data load workloads to speed up data processing.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
@@ -337,11 +387,11 @@ are available in ROCm :version:`rocm_version`.
|
||||
involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
|
||||
more.
|
||||
|
||||
Supported and unsupported features
|
||||
Supported features
|
||||
================================================================================
|
||||
|
||||
The following section maps GPU-accelerated PyTorch features to their supported
|
||||
ROCm and PyTorch versions.
|
||||
This section maps GPU-accelerated PyTorch features to their supported ROCm and
|
||||
PyTorch versions.
|
||||
|
||||
torch
|
||||
--------------------------------------------------------------------------------
|
||||
@@ -349,23 +399,24 @@ torch
|
||||
`torch <https://pytorch.org/docs/stable/index.html>`_ is the central module of
|
||||
PyTorch, providing data structures for multi-dimensional tensors and
|
||||
implementing mathematical operations on them. It also includes utilities for
|
||||
efficient serialization of tensors and arbitrary data types, along with various
|
||||
other tools.
|
||||
efficient serialization of tensors and arbitrary data types and other tools.
|
||||
|
||||
Tensor data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The data type of a tensor is specified using the ``dtype`` attribute or argument, and PyTorch supports a wide range of data types for different use cases.
|
||||
The tensor data type is specified using the ``dtype`` attribute or argument.
|
||||
PyTorch supports many data types for different use cases.
|
||||
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_'s single data types:
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_
|
||||
single data types:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.float8_e4m3fn``
|
||||
- 8-bit floating point, e4m3
|
||||
- 2.3
|
||||
@@ -457,11 +508,11 @@ The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors
|
||||
|
||||
.. note::
|
||||
|
||||
Unsigned types aside from ``uint8`` are currently only have limited support in
|
||||
eager mode (they primarily exist to assist usage with ``torch.compile``).
|
||||
Unsigned types except ``uint8`` have limited support in eager mode. They
|
||||
primarily exist to assist usage with ``torch.compile``.
|
||||
|
||||
The :doc:`ROCm precision support page <rocm:reference/precision-support>`
|
||||
collected the native HW support of different data types.
|
||||
See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
|
||||
native hardware support of data types.
|
||||
|
||||
torch.cuda
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@@ -476,8 +527,8 @@ leveraging ROCm and CUDA as the underlying frameworks.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Device management
|
||||
- Utilities for managing and interacting with GPUs.
|
||||
- 0.4.0
|
||||
@@ -551,8 +602,8 @@ PyTorch interacts with the ROCm or CUDA environment.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``cufft_plan_cache``
|
||||
- Manages caching of GPU FFT plans to optimize repeated FFT computations.
|
||||
- 1.7.0
|
||||
@@ -600,8 +651,8 @@ Supported ``torch`` options include:
|
||||
|
||||
* - Option
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``allow_tf32``
|
||||
- TensorFloat-32 tensor cores may be used in cuDNN convolutions on NVIDIA
|
||||
Ampere or newer GPUs.
|
||||
@@ -616,28 +667,28 @@ Supported ``torch`` options include:
|
||||
Automatic mixed precision: torch.amp
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
PyTorch that automates the process of using both 16-bit (half-precision,
|
||||
float16) and 32-bit (single-precision, float32) floating-point types in model
|
||||
training and inference.
|
||||
PyTorch automates the process of using both 16-bit (half-precision, float16) and
|
||||
32-bit (single-precision, float32) floating-point types in model training and
|
||||
inference.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Autocasting
|
||||
- Instances of autocast serve as context managers or decorators that allow
|
||||
- Autocast instances serve as context managers or decorators that allow
|
||||
regions of your script to run in mixed precision.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - Gradient scaling
|
||||
- To prevent underflow, “gradient scaling” multiplies the network’s
|
||||
loss(es) by a scale factor and invokes a backward pass on the scaled
|
||||
loss(es). Gradients flowing backward through the network are then
|
||||
scaled by the same factor. In other words, gradient values have a
|
||||
larger magnitude, so they don’t flush to zero.
|
||||
loss by a scale factor and invokes a backward pass on the scaled
|
||||
loss. The same factor then scales gradients flowing backward through
|
||||
the network. In other words, gradient values have a larger magnitude so
|
||||
that they don’t flush to zero.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - CUDA op-specific behavior
|
||||
@@ -651,7 +702,7 @@ training and inference.
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The PyTorch distributed library includes a collective of parallelism modules, a
|
||||
PyTorch distributed library includes a collective of parallelism modules, a
|
||||
communications layer, and infrastructure for launching and debugging large
|
||||
training jobs. See :ref:`rocm-for-ai-pytorch-distributed` for more information.
|
||||
|
||||
@@ -665,13 +716,13 @@ of computational resources and scalability for large-scale tasks.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - TensorPipe
|
||||
- A point-to-point communication library integrated into
|
||||
PyTorch for distributed training. It is designed to handle tensor data
|
||||
transfers efficiently between different processes or devices, including
|
||||
those on separate machines.
|
||||
PyTorch for distributed training. It handles tensor data transfers
|
||||
efficiently between different processes or devices, including those on
|
||||
separate machines.
|
||||
- 1.8
|
||||
- 5.4
|
||||
* - Gloo
|
||||
@@ -690,8 +741,8 @@ torch.compiler
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.compiler`` (AOT Autograd)
|
||||
- Autograd captures not only the user-level code, but also backpropagation,
|
||||
which results in capturing the backwards pass “ahead-of-time”. This
|
||||
@@ -714,8 +765,8 @@ The `torchaudio <https://pytorch.org/audio/stable/index.html>`_ library provides
|
||||
utilities for processing audio data in PyTorch, such as audio loading,
|
||||
transformations, and feature extraction.
|
||||
|
||||
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to move audio
|
||||
data (waveform tensor) explicitly to GPU using ``.to('cuda')``.
|
||||
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to
|
||||
explicitly move audio data (waveform tensor) to GPU using ``.to('cuda')``.
|
||||
|
||||
The following ``torchaudio`` features are GPU-accelerated.
|
||||
|
||||
@@ -724,10 +775,10 @@ The following ``torchaudio`` features are GPU-accelerated.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since torchaudio version
|
||||
- Since ROCm
|
||||
- As of torchaudio version
|
||||
- As of ROCm
|
||||
* - ``torchaudio.transforms.Spectrogram``
|
||||
- Generates spectrogram of an input waveform using STFT.
|
||||
- Generate a spectrogram of an input waveform using STFT.
|
||||
- 0.6.0
|
||||
- 4.5
|
||||
* - ``torchaudio.transforms.MelSpectrogram``
|
||||
@@ -747,7 +798,7 @@ torchvision
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchvision <https://pytorch.org/vision/stable/index.html>`_ library
|
||||
provide datasets, model architectures, and common image transformations for
|
||||
provides datasets, model architectures, and common image transformations for
|
||||
computer vision.
|
||||
|
||||
The following ``torchvision`` features are GPU-accelerated.
|
||||
@@ -757,8 +808,8 @@ The following ``torchvision`` features are GPU-accelerated.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since torchvision version
|
||||
- Since ROCm
|
||||
- As of torchvision version
|
||||
- As of ROCm
|
||||
* - ``torchvision.transforms.functional``
|
||||
- Provides GPU-compatible transformations for image preprocessing like
|
||||
resize, normalize, rotate and crop.
|
||||
@@ -804,7 +855,7 @@ torchtune
|
||||
The `torchtune <https://pytorch.org/torchtune/stable/index.html>`_ library for
|
||||
authoring, fine-tuning and experimenting with LLMs.
|
||||
|
||||
* Usage: It works out-of-the-box, enabling developers to fine-tune ROCm PyTorch solutions.
|
||||
* Usage: Enabling developers to fine-tune ROCm PyTorch solutions.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
@@ -815,7 +866,8 @@ The `torchserve <https://pytorch.org/serve/>`_ is a PyTorch domain library
|
||||
for common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
@@ -826,14 +878,16 @@ The `torchrec <https://pytorch.org/torchrec/>`_ is a PyTorch domain library for
|
||||
common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchrec does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
|
||||
* torchrec does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
Unsupported PyTorch features
|
||||
----------------------------
|
||||
================================================================================
|
||||
|
||||
The following are GPU-accelerated PyTorch features not currently supported by ROCm.
|
||||
The following GPU-accelerated PyTorch features are not supported by ROCm for
|
||||
the listed supported PyTorch versions.
|
||||
|
||||
.. list-table::
|
||||
:widths: 30, 60, 10
|
||||
@@ -841,7 +895,7 @@ The following are GPU-accelerated PyTorch features not currently supported by RO
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- As of PyTorch
|
||||
* - APEX batch norm
|
||||
- Use APEX batch norm instead of PyTorch batch norm.
|
||||
- 1.6.0
|
||||
@@ -897,31 +951,3 @@ The following are GPU-accelerated PyTorch features not currently supported by RO
|
||||
utilized effectively through custom CUDA extensions or advanced
|
||||
workflows.
|
||||
- Not a core feature
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/train-a-model>` provides
|
||||
guidance on how to leverage the ROCm platform for training AI models. It covers the steps, tools, and best practices
|
||||
for optimizing training workflows on AMD GPUs using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning and inference of
|
||||
machine learning models, particularly large language models (LLMs), on systems with a single AMD
|
||||
Instinct MI300X accelerator. This page provides a detailed guide for setting up, optimizing, and
|
||||
executing fine-tuning and inference workflows in such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning models on systems
|
||||
with multi MI300X accelerators.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>` provides detailed
|
||||
guidance on optimizing workloads for the AMD Instinct MI300X accelerator using ROCm. This guide is aimed at helping
|
||||
users achieve optimal performance for deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the use of PyTorch on the ROCm platform and
|
||||
focuses on how to efficiently leverage AMD GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
|
||||
|
||||
@@ -56,7 +56,7 @@ Docker image compatibility
|
||||
AMD validates and publishes ready-made `TensorFlow images
|
||||
<https://hub.docker.com/r/rocm/tensorflow>`_ with ROCm backends on
|
||||
Docker Hub. The following Docker image tags and associated inventories are
|
||||
validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_. Click
|
||||
validated for `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click
|
||||
the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: TensorFlow Docker image components
|
||||
@@ -64,57 +64,91 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
* - Docker image
|
||||
- TensorFlow
|
||||
- Ubuntu
|
||||
- Dev
|
||||
- Python
|
||||
- TensorBoard
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-dev/images/sha256-fa9cf5fa6c6079a7118727531ccd0056c6e3224a42c3d6e78a49e7781daafff4"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-runtime/images/sha256-14addca4b92a47c806b83ebaeed593fc6672cd99f0017ed8dad759fe72ed0309"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-dev/images/sha256-f5e151060df04ff5fb59f5604b49cd371931bbe75b06aec9fe7781397c4be0ce"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-runtime/images/sha256-5cd4c03fdb1036570c0d4929da60a65c4466998dc80f1dc8a5a0b173eae017fb"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-dev/images/sha256-b3add80e374a2db2d1088d746e740afa89d439aca02cacba959ad298f5cd2b3f"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-runtime/images/sha256-3a244f026c32177eff7958ffbad390de85b438b2b48b455cc39f15d70fa1270d"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 24.04
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-dev/images/sha256-e0cecdfacb59169335049983cdab6da578c209bb9f4d08aad97e184ae59171a6"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-runtime/images/sha256-6f43de12f7eb202791b698ac51d28b72098de90034dbcd48486629b0125f7707"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
- `rocm/tensorflow`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- dev
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- runtime
|
||||
- 22.04
|
||||
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
|
||||
|
||||
|
||||
Critical ROCm libraries for TensorFlow
|
||||
===============================================================================
|
||||
|
||||
@@ -34,15 +34,15 @@ project = "ROCm Documentation"
|
||||
project_path = os.path.abspath(".").replace("\\", "/")
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.4.0"
|
||||
release = "6.4.0"
|
||||
version = "6.4.1"
|
||||
release = "6.4.1"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-04-10"},
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-05-07"},
|
||||
{"file": "release/changelog", "os": ["linux"],},
|
||||
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
|
||||
@@ -70,6 +70,7 @@ article_pages = [
|
||||
{"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/vllm-benchmark", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/pytorch-inference-benchmark", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
pytorch_inference_benchmark:
|
||||
unified_docker:
|
||||
latest: &rocm-pytorch-docker-latest
|
||||
pull_tag: rocm/pytorch:latest
|
||||
docker_hub_url:
|
||||
rocm_version:
|
||||
pytorch_version:
|
||||
hipblaslt_version:
|
||||
model_groups:
|
||||
- group: CLIP
|
||||
tag: clip
|
||||
models:
|
||||
- model: CLIP
|
||||
mad_tag: pyt_clip_inference
|
||||
model_repo: laion/CLIP-ViT-B-32-laion2B-s34B-b79K
|
||||
url: https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K
|
||||
precision: float16
|
||||
- group: Chai-1
|
||||
tag: chai
|
||||
models:
|
||||
- model: Chai-1
|
||||
mad_tag: pyt_chai1_inference
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/chaidiscovery/chai-1
|
||||
precision: float16
|
||||
@@ -1,10 +1,10 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
pull_tag: rocm/vllm:instinct_main
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250311/images/sha256-de0a2649b735f45b7ecab8813eb7b19778ae1f40591ca1196b07bc29c42ed4a3
|
||||
pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250415
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845
|
||||
rocm_version: 6.3.1
|
||||
vllm_version: 0.7.3
|
||||
vllm_version: 0.8.3
|
||||
pytorch_version: 2.7.0 (dev nightly)
|
||||
hipblaslt_version: 0.13
|
||||
model_groups:
|
||||
@@ -102,19 +102,12 @@ vllm_benchmark:
|
||||
model_repo: Qwen/Qwen2-72B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
|
||||
precision: float16
|
||||
- group: JAIS
|
||||
tag: jais
|
||||
models:
|
||||
- model: JAIS 13B
|
||||
mad_tag: pyt_vllm_jais-13b
|
||||
model_repo: core42/jais-13b-chat
|
||||
url: https://huggingface.co/core42/jais-13b-chat
|
||||
precision: float16
|
||||
- model: JAIS 30B
|
||||
mad_tag: pyt_vllm_jais-30b
|
||||
model_repo: core42/jais-30b-chat-v3
|
||||
url: https://huggingface.co/core42/jais-30b-chat-v3
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
tunableop: true
|
||||
- group: DBRX
|
||||
tag: dbrx
|
||||
models:
|
||||
@@ -157,3 +150,11 @@ vllm_benchmark:
|
||||
model_repo: deepseek-ai/deepseek-moe-16b-chat
|
||||
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
|
||||
precision: float16
|
||||
- group: TII Falcon
|
||||
tag: falcon
|
||||
models:
|
||||
- model: Falcon 180B
|
||||
mad_tag: pyt_vllm_falcon-180b
|
||||
model_repo: tiiuae/falcon-180B
|
||||
url: https://huggingface.co/tiiuae/falcon-180B
|
||||
precision: float16
|
||||
|
||||
BIN
docs/data/rocm-software-stack-6_4_0.jpg
Normal file
BIN
docs/data/rocm-software-stack-6_4_0.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
@@ -1,15 +1,178 @@
|
||||
.. meta::
|
||||
:description: How to use model quantization techniques to speed up inference.
|
||||
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, GPTQ, transformers, bitsandbytes
|
||||
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, Quark, GPTQ, transformers, bitsandbytes
|
||||
|
||||
*****************************
|
||||
Model quantization techniques
|
||||
*****************************
|
||||
|
||||
Quantization reduces the model size compared to its native full-precision version, making it easier to fit large models
|
||||
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using GPTQ
|
||||
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using AMD Quark, GPTQ
|
||||
and bitsandbytes on AMD Instinct hardware.
|
||||
|
||||
.. _quantize-llms-quark:
|
||||
|
||||
AMD Quark
|
||||
=========
|
||||
|
||||
`AMD Quark <https://quark.docs.amd.com/latest/>`_ offers the leading efficient and scalable quantization solution tailored to AMD Instinct GPUs. It supports ``FP8`` and ``INT8`` quantization for activations, weights, and KV cache,
|
||||
including ``FP8`` attention. For very large models, it employs a two-level ``INT4-FP8`` scheme—storing weights in ``INT4`` while computing with ``FP8``—for nearly 4× compression without sacrificing accuracy.
|
||||
Quark scales efficiently across multiple GPUs, efficiently handling ultra-large models like Llama-3.1-405B. Quantized ``FP8`` models like Llama, Mixtral, and Grok-1 are available under the `AMD organization on Hugging Face <https://huggingface.co/collections/amd/quark-quantized-ocp-fp8-models-66db7936d18fcbaf95d4405c>`_, and can be deployed directly via `vLLM <https://github.com/vllm-project/vllm/tree/main/vllm>`_.
|
||||
|
||||
Installing Quark
|
||||
-------------------
|
||||
|
||||
The latest release of Quark can be installed with pip
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install amd-quark
|
||||
|
||||
For detailed installation instructions, refer to the `Quark documentation <https://quark.docs.amd.com/latest/install.html>`_.
|
||||
|
||||
|
||||
Using Quark for quantization
|
||||
-----------------------------
|
||||
|
||||
#. First, load the pre-trained model and its corresponding tokenizer using the Hugging Face ``transformers`` library.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
|
||||
MAX_SEQ_LEN = 512
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_ID, device_map="auto", torch_dtype="auto",
|
||||
)
|
||||
model.eval()
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, model_max_length=MAX_SEQ_LEN)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
#. Prepare the calibration DataLoader (static quantization requires calibration data).
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
BATCH_SIZE = 1
|
||||
NUM_CALIBRATION_DATA = 512
|
||||
|
||||
dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
|
||||
text_data = dataset["text"][:NUM_CALIBRATION_DATA]
|
||||
|
||||
tokenized_outputs = tokenizer(
|
||||
text_data, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SEQ_LEN
|
||||
)
|
||||
calib_dataloader = DataLoader(
|
||||
tokenized_outputs['input_ids'], batch_size=BATCH_SIZE, drop_last=True
|
||||
)
|
||||
|
||||
#. Define the quantization configuration. See the comments in the following code snippet for descriptions of each configuration option.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from quark.torch.quantization import (Config, QuantizationConfig,
|
||||
FP8E4M3PerTensorSpec)
|
||||
|
||||
# Define fp8/per-tensor/static spec.
|
||||
FP8_PER_TENSOR_SPEC = FP8E4M3PerTensorSpec(observer_method="min_max",
|
||||
is_dynamic=False).to_quantization_spec()
|
||||
|
||||
# Define global quantization config, input tensors and weight apply FP8_PER_TENSOR_SPEC.
|
||||
global_quant_config = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC,
|
||||
weight=FP8_PER_TENSOR_SPEC)
|
||||
|
||||
# Define quantization config for kv-cache layers, output tensors apply FP8_PER_TENSOR_SPEC.
|
||||
KV_CACHE_SPEC = FP8_PER_TENSOR_SPEC
|
||||
kv_cache_layer_names_for_llama = ["*k_proj", "*v_proj"]
|
||||
kv_cache_quant_config = {name :
|
||||
QuantizationConfig(input_tensors=global_quant_config.input_tensors,
|
||||
weight=global_quant_config.weight,
|
||||
output_tensors=KV_CACHE_SPEC)
|
||||
for name in kv_cache_layer_names_for_llama}
|
||||
layer_quant_config = kv_cache_quant_config.copy()
|
||||
|
||||
EXCLUDE_LAYERS = ["lm_head"]
|
||||
quant_config = Config(
|
||||
global_quant_config=global_quant_config,
|
||||
layer_quant_config=layer_quant_config,
|
||||
kv_cache_quant_config=kv_cache_quant_config,
|
||||
exclude=EXCLUDE_LAYERS)
|
||||
|
||||
#. Quantize the model and export
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import torch
|
||||
from quark.torch import ModelQuantizer, ModelExporter
|
||||
from quark.torch.export import ExporterConfig, JsonExporterConfig
|
||||
|
||||
# Apply quantization.
|
||||
quantizer = ModelQuantizer(quant_config)
|
||||
quant_model = quantizer.quantize_model(model, calib_dataloader)
|
||||
|
||||
# Freeze quantized model to export.
|
||||
freezed_model = quantizer.freeze(model)
|
||||
|
||||
# Define export config.
|
||||
LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"]
|
||||
export_config = ExporterConfig(json_export_config=JsonExporterConfig())
|
||||
export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP
|
||||
|
||||
EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor"
|
||||
exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR)
|
||||
with torch.no_grad():
|
||||
exporter.export_safetensors_model(freezed_model,
|
||||
quant_config=quant_config, tokenizer=tokenizer)
|
||||
|
||||
Evaluating the quantized model with vLLM
|
||||
----------------------------------------
|
||||
|
||||
The exported Quark-quantized model can be loaded directly by vLLM for inference. You need to specify the model path and inform vLLM about the quantization method (``quantization='quark'``) and the KV cache data type (``kv_cache_dtype='fp8'``).
|
||||
Use the ``LLM`` interface to load the model:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from vllm import LLM, SamplingParamsinterface
|
||||
|
||||
# Sample prompts.
|
||||
prompts = [
|
||||
"Hello, my name is",
|
||||
"The president of the United States is",
|
||||
"The capital of France is",
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
|
||||
|
||||
# Create an LLM.
|
||||
llm = LLM(model="Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor",
|
||||
kv_cache_dtype='fp8',quantization='quark')
|
||||
# Generate texts from the prompts. The output is a list of RequestOutput objects
|
||||
# that contain the prompt, generated text, and other information.
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:\n" + "-" * 60)
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
print(f"Prompt: {prompt!r}")
|
||||
print(f"Output: {generated_text!r}")
|
||||
print("-" * 60)
|
||||
|
||||
You can also evaluate the quantized model's accuracy on standard benchmarks using the `lm-evaluation-harness <https://github.com/EleutherAI/lm-evaluation-harness>`_. Pass the necessary vLLM arguments to ``lm_eval`` via ``--model_args``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
lm_eval --model vllm \
|
||||
--model_args pretrained=Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor,kv_cache_dtype='fp8',quantization='quark' \
|
||||
--tasks gsm8k
|
||||
|
||||
This provides a standardized way to measure the performance impact of quantization.
|
||||
.. _fine-tune-llms-gptq:
|
||||
|
||||
GPTQ
|
||||
@@ -33,7 +196,7 @@ The AutoGPTQ library implements the GPTQ algorithm.
|
||||
.. code-block:: shell
|
||||
|
||||
# This will install pre-built wheel for a specific ROCm version.
|
||||
|
||||
|
||||
pip install auto-gptq --no-build-isolation --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm573/
|
||||
|
||||
Or, install AutoGPTQ from source for the appropriate ROCm version (for example, ROCm 6.1).
|
||||
@@ -43,10 +206,10 @@ The AutoGPTQ library implements the GPTQ algorithm.
|
||||
# Clone the source code.
|
||||
git clone https://github.com/AutoGPTQ/AutoGPTQ.git
|
||||
cd AutoGPTQ
|
||||
|
||||
|
||||
# Speed up the compilation by specifying PYTORCH_ROCM_ARCH to target device.
|
||||
PYTORCH_ROCM_ARCH=gfx942 ROCM_VERSION=6.1 pip install .
|
||||
|
||||
|
||||
# Show the package after the installation
|
||||
|
||||
#. Run ``pip show auto-gptq`` to print information for the installed ``auto-gptq`` package. Its output should look like
|
||||
@@ -112,7 +275,7 @@ Using GPTQ with Hugging Face Transformers
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
|
||||
|
||||
|
||||
base_model_name = " NousResearch/Llama-2-7b-hf"
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
|
||||
@@ -212,10 +375,10 @@ To get started with bitsandbytes primitives, use the following code as reference
|
||||
.. code-block:: python
|
||||
|
||||
import bitsandbytes as bnb
|
||||
|
||||
|
||||
# Use Int8 Matrix Multiplication
|
||||
bnb.matmul(..., threshold=6.0)
|
||||
|
||||
|
||||
# Use bitsandbytes 8-bit Optimizers
|
||||
adam = bnb.optim.Adam8bit(model.parameters(), lr=0.001, betas=(0.9, 0.995))
|
||||
|
||||
@@ -227,14 +390,14 @@ To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndByt
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
||||
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
||||
bnb_model_4bit = AutoModelForCausalLM.from_pretrained(
|
||||
base_model_name,
|
||||
device_map="auto",
|
||||
quantization_config=quantization_config)
|
||||
|
||||
|
||||
# Check the memory footprint with get_memory_footprint method
|
||||
print(bnb_model_4bit.get_memory_footprint())
|
||||
|
||||
@@ -243,9 +406,9 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
@@ -253,7 +416,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
base_model_name,
|
||||
device_map="auto",
|
||||
quantization_config=quantization_config)
|
||||
|
||||
|
||||
prompt = "What is a large language model?"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
||||
generated_ids = model.generate(**inputs)
|
||||
|
||||
@@ -685,7 +685,7 @@ Two sample Llama scaling configuration files are in vLLM for ``llama2-70b`` and
|
||||
``llama2-7b``.
|
||||
|
||||
If building the vLLM using
|
||||
`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/Dockerfile.rocm>`_
|
||||
`Dockerfile.rocm <https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile.rocm>`_
|
||||
for ``llama2-70b`` scale config, find the file at
|
||||
``/vllm-workspace/tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json`` at
|
||||
runtime.
|
||||
|
||||
@@ -16,8 +16,7 @@ ROCm supports vLLM and Hugging Face TGI as major LLM-serving frameworks.
|
||||
Serving using vLLM
|
||||
==================
|
||||
|
||||
vLLM is a fast and easy-to-use library for LLM inference and serving. vLLM officially supports ROCm versions 5.7 and
|
||||
6.0. AMD is actively working with the vLLM team to improve performance and support later ROCm versions.
|
||||
vLLM is a fast and easy-to-use library for LLM inference and serving. AMD is actively working with the vLLM team to improve performance and support the latest ROCm versions.
|
||||
|
||||
See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `official vLLM documentation
|
||||
<https://docs.vllm.ai/>`_ for more information.
|
||||
@@ -31,9 +30,9 @@ vLLM installation
|
||||
vLLM supports two ROCm-capable installation methods. Refer to the official documentation use the following links.
|
||||
|
||||
- `Build from source with Docker
|
||||
<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-docker-rocm>`_ (recommended)
|
||||
<https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-image-from-source>`_ (recommended)
|
||||
|
||||
- `Build from source <https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-rocm>`_
|
||||
- `Build from source <https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-wheel-from-source>`_
|
||||
|
||||
vLLM walkthrough
|
||||
----------------
|
||||
|
||||
@@ -36,7 +36,7 @@ Installing vLLM
|
||||
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm
|
||||
docker build -f Dockerfile.rocm -t vllm-rocm .
|
||||
docker build -f docker/Dockerfile.rocm -t vllm-rocm .
|
||||
|
||||
.. tab-set::
|
||||
|
||||
|
||||
@@ -0,0 +1,167 @@
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
|
||||
ROCm PyTorch Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate, pytorch
|
||||
|
||||
*************************************
|
||||
PyTorch inference performance testing
|
||||
*************************************
|
||||
|
||||
.. _pytorch-inference-benchmark-docker:
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.pytorch_inference_benchmark.unified_docker.latest %}
|
||||
{% set model_groups = data.pytorch_inference_benchmark.model_groups %}
|
||||
|
||||
The `ROCm PyTorch Docker <https://hub.docker.com/r/rocm/pytorch/tags>`_ image offers a prebuilt,
|
||||
optimized environment for testing model inference performance on AMD Instinct™ MI300X series
|
||||
accelerators. This guide demonstrates how to use the AMD Model Automation and Dashboarding (MAD)
|
||||
tool with the ROCm PyTorch container to test inference performance on various models efficiently.
|
||||
|
||||
.. _pytorch-inference-benchmark-available-models:
|
||||
|
||||
Supported models
|
||||
================
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row">
|
||||
<div class="col-2 me-2 model-param-head">Model</div>
|
||||
<div class="row col-10">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-6 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mt-1" style="display: none;">
|
||||
<div class="col-2 me-2 model-param-head">Model variant</div>
|
||||
<div class="row col-10">
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
<div class="col-12 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{model.mad_tag}}
|
||||
|
||||
.. note::
|
||||
|
||||
See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
|
||||
Some models require access authorization before use via an external license agreement through a third party.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Getting started
|
||||
===============
|
||||
|
||||
Use the following procedures to reproduce the benchmark results on an
|
||||
MI300X series accelerator with the prebuilt PyTorch Docker image.
|
||||
|
||||
.. _pytorch-benchmark-get-started:
|
||||
|
||||
1. Disable NUMA auto-balancing.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# disable automatic NUMA balancing
|
||||
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
# check if NUMA balancing is disabled (returns 0 if disabled)
|
||||
cat /proc/sys/kernel/numa_balancing
|
||||
0
|
||||
|
||||
.. container:: model-doc pyt_chai1_inference
|
||||
|
||||
2. Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/latest/images/sha256-05b55983e5154f46e7441897d0908d79877370adca4d1fff4899d9539d6c4969>`_ from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue
|
||||
|
||||
.. note::
|
||||
|
||||
The Chai-1 benchmark uses a specifically selected Docker image using ROCm 6.2.3 and PyTorch 2.3.0 to address an accuracy issue.
|
||||
|
||||
.. container:: model-doc pyt_clip_inference
|
||||
|
||||
2. Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue/images/sha256-b736a4239ab38a9d0e448af6d4adca83b117debed00bfbe33846f99c4540f79b>`_ from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/pytorch:latest
|
||||
|
||||
Benchmarking
|
||||
============
|
||||
|
||||
.. _pytorch-inference-benchmark-mad:
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{model.mad_tag}}
|
||||
|
||||
To simplify performance testing, the ROCm Model Automation and Dashboarding
|
||||
(`<https://github.com/ROCm/MAD>`__) project provides ready-to-use scripts and configuration.
|
||||
To start, clone the MAD repository to a local directory and install the required packages on the
|
||||
host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
|
||||
using one GPU with the ``{{model.precision}}`` data type on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
|
||||
model are collected in ``perf.csv``.
|
||||
|
||||
.. note::
|
||||
|
||||
For improved performance, consider enabling TunableOp. By default,
|
||||
``{{model.mad_tag}}`` runs with TunableOp disabled (see
|
||||
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable
|
||||
it, edit the default run behavior in the ``tools/run_models.py``-- update the model's
|
||||
run ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
|
||||
|
||||
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
|
||||
Although this might increase the initial training time, it can result in a performance gain.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your model, see
|
||||
:doc:`Running models from Hugging Face <hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../inference-optimization/index>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../fine-tuning/index>`.
|
||||
@@ -3,9 +3,9 @@
|
||||
ROCm vLLM Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
********************************************************
|
||||
LLM inference performance testing on AMD Instinct MI300X
|
||||
********************************************************
|
||||
**********************************
|
||||
vLLM inference performance testing
|
||||
**********************************
|
||||
|
||||
.. _vllm-benchmark-unified-docker:
|
||||
|
||||
@@ -16,7 +16,7 @@ LLM inference performance testing on AMD Instinct MI300X
|
||||
|
||||
The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
|
||||
a prebuilt, optimized environment for validating large language model (LLM)
|
||||
inference performance on AMD Instinct™ MI300X series accelerator. This ROCm vLLM
|
||||
inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
|
||||
Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
|
||||
accelerators and includes the following components:
|
||||
|
||||
@@ -34,7 +34,7 @@ LLM inference performance testing on AMD Instinct MI300X
|
||||
|
||||
.. _vllm-benchmark-available-models:
|
||||
|
||||
Available models
|
||||
Supported models
|
||||
================
|
||||
|
||||
.. raw:: html
|
||||
@@ -183,6 +183,25 @@ LLM inference performance testing on AMD Instinct MI300X
|
||||
to collect latency and throughput performance data, you can also change the benchmarking
|
||||
parameters. See the standalone benchmarking tab for more information.
|
||||
|
||||
{% if model.tunableop %}
|
||||
|
||||
.. note::
|
||||
|
||||
For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
|
||||
TunableOp automatically explores different implementations and configurations of certain PyTorch
|
||||
operators to find the fastest one for your hardware.
|
||||
|
||||
By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
|
||||
(see
|
||||
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
|
||||
enable it, edit the default run behavior in the ``models.json``
|
||||
configuration before running inference -- update the model's run
|
||||
``args`` by changing ``--tunableop off`` to ``--tunableop on``.
|
||||
|
||||
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
|
||||
|
||||
{% endif %}
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
Run the vLLM benchmark tool independently by starting the
|
||||
@@ -331,11 +350,18 @@ for benchmarking, see the version-specific documentation.
|
||||
- PyTorch version
|
||||
- Resources
|
||||
|
||||
* - 6.3.1
|
||||
- 0.7.3
|
||||
- 2.7.0
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
|
||||
|
||||
* - 6.3.1
|
||||
- 0.6.6
|
||||
- 2.7.0
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_
|
||||
|
||||
* - 6.2.1
|
||||
|
||||
@@ -12,7 +12,7 @@ ROCm is an optimized fork of the upstream
|
||||
`<https://github.com/AI-Hypercomputer/maxtext>`__ enabling efficient AI workloads
|
||||
on AMD MI300X series accelerators.
|
||||
|
||||
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.4``) image
|
||||
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.5``) image
|
||||
provides a prebuilt environment for training on AMD Instinct MI300X and MI325X accelerators,
|
||||
including essential components like JAX, XLA, ROCm libraries, and MaxText utilities.
|
||||
It includes the following software components:
|
||||
@@ -20,15 +20,15 @@ It includes the following software components:
|
||||
+--------------------------+--------------------------------+
|
||||
| Software component | Version |
|
||||
+==========================+================================+
|
||||
| ROCm | 6.3.0 |
|
||||
| ROCm | 6.3.4 |
|
||||
+--------------------------+--------------------------------+
|
||||
| JAX | 0.4.31 |
|
||||
| JAX | 0.4.35 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Python | 3.10 |
|
||||
| Python | 3.10.12 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Transformer Engine | 1.12.0.dev0+f81a3eb |
|
||||
| Transformer Engine | 1.12.0.dev0+b8b92dc |
|
||||
+--------------------------+--------------------------------+
|
||||
| hipBLASLt | git78ec8622 |
|
||||
| hipBLASLt | 0.13.0-ae9c477a |
|
||||
+--------------------------+--------------------------------+
|
||||
|
||||
Supported features and models
|
||||
@@ -48,6 +48,8 @@ MaxText provides the following key features to train large language models effic
|
||||
|
||||
The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
|
||||
|
||||
* Llama 3.3 70B
|
||||
|
||||
* Llama 3.1 8B
|
||||
|
||||
* Llama 3.1 70B
|
||||
@@ -115,7 +117,7 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.
|
||||
|
||||
a. Master address
|
||||
|
||||
Change `localhost` to the master node's resolvable hostname or IP address:
|
||||
Change ``localhost`` to the master node's resolvable hostname or IP address:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@@ -180,13 +182,15 @@ Download the Docker image
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/jax-training:maxtext-v25.4
|
||||
docker pull rocm/jax-training:maxtext-v25.5
|
||||
|
||||
2. Run the Docker container.
|
||||
2. Use the following command to launch the Docker container. Note that the benchmarking scripts
|
||||
used in the :ref:`following section <amd-maxtext-get-started>` automatically launch the Docker container
|
||||
and execute the benchmark.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.4
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.5
|
||||
|
||||
.. _amd-maxtext-get-started:
|
||||
|
||||
@@ -219,7 +223,9 @@ Single node training benchmarking examples
|
||||
|
||||
Run the single node training benchmark:
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_7b.sh
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_7b.sh
|
||||
|
||||
* Example 2: Single node training with Llama 2 70B
|
||||
|
||||
@@ -233,7 +239,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_70b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_70b.sh
|
||||
|
||||
* Example 3: Single node training with Llama 3 8B
|
||||
|
||||
@@ -247,7 +253,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_8b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_8b.sh
|
||||
|
||||
* Example 4: Single node training with Llama 3 70B
|
||||
|
||||
@@ -261,9 +267,23 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_70b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_70b.sh
|
||||
|
||||
* Example 5: Single node training with DeepSeek V2 16B
|
||||
* Example 5: Single node training with Llama 3.3 70B
|
||||
|
||||
Download the benchmarking script:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/maxtext/refs/heads/main/benchmarks/gpu-rocm/llama3.3_70b.sh
|
||||
|
||||
Run the single node training benchmark:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3.3_70b.sh
|
||||
|
||||
* Example 6: Single node training with DeepSeek V2 16B
|
||||
|
||||
Download the benchmarking script:
|
||||
|
||||
@@ -275,7 +295,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./deepseek_v2_16b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./deepseek_v2_16b.sh
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -343,3 +363,26 @@ own cluster setup.
|
||||
.. code-block:: shell
|
||||
|
||||
sbatch -N <num_nodes> llama3_70b_multinode.sh
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
This table lists previous versions of the ROCm JAX MaxText Docker image for training
|
||||
performance testing. For detailed information about available models for
|
||||
benchmarking, see the version-specific documentation.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
* - Image version
|
||||
- ROCm version
|
||||
- JAX version
|
||||
- Resources
|
||||
|
||||
* - 25.4
|
||||
- 6.3.0
|
||||
- 0.4.31
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.4/images/sha256-fb3eb71cd74298a7b3044b7130cf84113f14d518ff05a2cd625c11ea5f6a7b01>`_
|
||||
|
||||
@@ -9,7 +9,8 @@ Training a model with PyTorch for ROCm
|
||||
PyTorch is an open-source machine learning framework that is widely used for
|
||||
model training with GPU-optimized components for transformer-based models.
|
||||
|
||||
The PyTorch for ROCm training Docker (``rocm/pytorch-training:v25.4``) image
|
||||
The `PyTorch for ROCm training Docker <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
|
||||
(``rocm/pytorch-training:v25.5``) image
|
||||
provides a prebuilt optimized environment for fine-tuning and pretraining a
|
||||
model on AMD Instinct MI325X and MI300X accelerators. It includes the following
|
||||
software components to accelerate training workloads:
|
||||
@@ -17,19 +18,19 @@ software components to accelerate training workloads:
|
||||
+--------------------------+--------------------------------+
|
||||
| Software component | Version |
|
||||
+==========================+================================+
|
||||
| ROCm | 6.3.0 |
|
||||
| ROCm | 6.3.4 |
|
||||
+--------------------------+--------------------------------+
|
||||
| PyTorch | 2.7.0a0+git637433 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Python | 3.10 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Transformer Engine | 1.11 |
|
||||
| Transformer Engine | 1.12.0.dev0+25a33da |
|
||||
+--------------------------+--------------------------------+
|
||||
| Flash Attention | 3.0.0 |
|
||||
+--------------------------+--------------------------------+
|
||||
| hipBLASLt | git258a2162 |
|
||||
| hipBLASLt | git53b53bf |
|
||||
+--------------------------+--------------------------------+
|
||||
| Triton | 3.1 |
|
||||
| Triton | 3.2.0 |
|
||||
+--------------------------+--------------------------------+
|
||||
|
||||
.. _amd-pytorch-training-model-support:
|
||||
@@ -39,6 +40,8 @@ Supported models
|
||||
|
||||
The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
|
||||
|
||||
* Llama 3.3 70B
|
||||
|
||||
* Llama 3.1 8B
|
||||
|
||||
* Llama 3.1 70B
|
||||
@@ -79,309 +82,346 @@ auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
|
||||
and optimization steps <train-a-model-system-validation>` to set up your system
|
||||
before starting training.
|
||||
|
||||
Environment setup
|
||||
=================
|
||||
|
||||
This Docker image is optimized for specific model configurations outlined
|
||||
below. Performance can vary for other training workloads, as AMD
|
||||
doesn’t validate configurations and run conditions outside those described.
|
||||
|
||||
Download the Docker image
|
||||
-------------------------
|
||||
Benchmarking
|
||||
============
|
||||
|
||||
1. Use the following command to pull the Docker image from Docker Hub.
|
||||
Once the setup is complete, choose between two options to start benchmarking:
|
||||
|
||||
.. code-block:: shell
|
||||
.. tab-set::
|
||||
|
||||
docker pull rocm/pytorch-training:v25.4
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
2. Run the Docker container.
|
||||
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.4
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
3. Use these commands if you exit the ``training_env`` container and need to return to it.
|
||||
For example, use this command to run the performance benchmark test on the Llama 3.1 8B model
|
||||
using one GPU with the float16 data type on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
.. code-block:: shell
|
||||
|
||||
docker start training_env
|
||||
docker exec -it training_env bash
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags pyt_train_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
4. In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
|
||||
repository and navigate to the benchmark scripts directory
|
||||
``/workspace/MAD/scripts/pytorch_train``.
|
||||
The available models for MAD-integrated benchmarking are:
|
||||
|
||||
.. code-block:: shell
|
||||
* ``pyt_train_llama-3.3-70b``
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/pytorch_train
|
||||
* ``pyt_train_llama-3.1-8b``
|
||||
|
||||
Prepare training datasets and dependencies
|
||||
------------------------------------------
|
||||
* ``pyt_train_llama-3.1-70b``
|
||||
|
||||
The following benchmarking examples require downloading models and datasets
|
||||
from Hugging Face. To ensure successful access to gated repos, set your
|
||||
``HF_TOKEN``.
|
||||
* ``pyt_train_flux``
|
||||
|
||||
.. code-block:: shell
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-pyt_train_llama-3.1-8b``, for example. The latency and throughput reports of the
|
||||
model are collected in the following path: ``~/MAD/perf.csv``.
|
||||
|
||||
export HF_TOKEN=$your_personal_hugging_face_access_token
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
Run the setup script to install libraries and datasets needed for benchmarking.
|
||||
.. rubric:: Download the Docker image and required packages
|
||||
|
||||
.. code-block:: shell
|
||||
Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
./pytorch_benchmark_setup.sh
|
||||
.. code-block:: shell
|
||||
|
||||
``pytorch_benchmark_setup.sh`` installs the following libraries:
|
||||
docker pull rocm/pytorch-training:v25.5
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
Run the Docker container.
|
||||
|
||||
* - Library
|
||||
- Benchmark model
|
||||
- Reference
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``accelerate``
|
||||
- Llama 3.1 8B, FLUX
|
||||
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.5
|
||||
|
||||
* - ``datasets``
|
||||
- Llama 3.1 8B, 70B, FLUX
|
||||
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
|
||||
Use these commands if you exit the ``training_env`` container and need to return to it.
|
||||
|
||||
* - ``torchdata``
|
||||
- Llama 3.1 70B
|
||||
- `TorchData <https://pytorch.org/data/beta/index.html>`_
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``tomli``
|
||||
- Llama 3.1 70B
|
||||
- `Tomli <https://pypi.org/project/tomli/>`_
|
||||
docker start training_env
|
||||
docker exec -it training_env bash
|
||||
|
||||
* - ``tiktoken``
|
||||
- Llama 3.1 70B
|
||||
- `tiktoken <https://github.com/openai/tiktoken>`_
|
||||
In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
|
||||
repository and navigate to the benchmark scripts directory
|
||||
``/workspace/MAD/scripts/pytorch_train``.
|
||||
|
||||
* - ``blobfile``
|
||||
- Llama 3.1 70B
|
||||
- `blobfile <https://pypi.org/project/blobfile/>`_
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``tabulate``
|
||||
- Llama 3.1 70B
|
||||
- `tabulate <https://pypi.org/project/tabulate/>`_
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/pytorch_train
|
||||
|
||||
* - ``wandb``
|
||||
- Llama 3.1 70B
|
||||
- `Weights & Biases <https://github.com/wandb/wandb>`_
|
||||
.. rubric:: Prepare training datasets and dependencies
|
||||
|
||||
* - ``sentencepiece``
|
||||
- Llama 3.1 70B, FLUX
|
||||
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
|
||||
The following benchmarking examples require downloading models and datasets
|
||||
from Hugging Face. To ensure successful access to gated repos, set your
|
||||
``HF_TOKEN``.
|
||||
|
||||
* - ``tensorboard``
|
||||
- Llama 3.1 70 B, FLUX
|
||||
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``csvkit``
|
||||
- FLUX
|
||||
- `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
|
||||
export HF_TOKEN=$your_personal_hugging_face_access_token
|
||||
|
||||
* - ``deepspeed``
|
||||
- FLUX
|
||||
- `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
|
||||
Run the setup script to install libraries and datasets needed for benchmarking.
|
||||
|
||||
* - ``diffusers``
|
||||
- FLUX
|
||||
- `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``GitPython``
|
||||
- FLUX
|
||||
- `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
|
||||
./pytorch_benchmark_setup.sh
|
||||
|
||||
* - ``opencv-python-headless``
|
||||
- FLUX
|
||||
- `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
|
||||
``pytorch_benchmark_setup.sh`` installs the following libraries:
|
||||
|
||||
* - ``peft``
|
||||
- FLUX
|
||||
- `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ``protobuf``
|
||||
- FLUX
|
||||
- `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
|
||||
* - Library
|
||||
- Benchmark model
|
||||
- Reference
|
||||
|
||||
* - ``pytest``
|
||||
- FLUX
|
||||
- `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
|
||||
* - ``accelerate``
|
||||
- Llama 3.1 8B, FLUX
|
||||
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
|
||||
|
||||
* - ``python-dotenv``
|
||||
- FLUX
|
||||
- `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
|
||||
* - ``datasets``
|
||||
- Llama 3.1 8B, 70B, FLUX
|
||||
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
|
||||
|
||||
* - ``seaborn``
|
||||
- FLUX
|
||||
- `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
|
||||
* - ``torchdata``
|
||||
- Llama 3.1 70B
|
||||
- `TorchData <https://pytorch.org/data/beta/index.html>`_
|
||||
|
||||
* - ``transformers``
|
||||
- FLUX
|
||||
- `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
|
||||
* - ``tomli``
|
||||
- Llama 3.1 70B
|
||||
- `Tomli <https://pypi.org/project/tomli/>`_
|
||||
|
||||
``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
|
||||
* - ``tiktoken``
|
||||
- Llama 3.1 70B
|
||||
- `tiktoken <https://github.com/openai/tiktoken>`_
|
||||
|
||||
* `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
|
||||
* - ``blobfile``
|
||||
- Llama 3.1 70B
|
||||
- `blobfile <https://pypi.org/project/blobfile/>`_
|
||||
|
||||
* `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
* - ``tabulate``
|
||||
- Llama 3.1 70B
|
||||
- `tabulate <https://pypi.org/project/tabulate/>`_
|
||||
|
||||
Along with the following datasets:
|
||||
* - ``wandb``
|
||||
- Llama 3.1 70B
|
||||
- `Weights & Biases <https://github.com/wandb/wandb>`_
|
||||
|
||||
* `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
|
||||
* - ``sentencepiece``
|
||||
- Llama 3.1 70B, FLUX
|
||||
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
|
||||
|
||||
* `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
|
||||
* - ``tensorboard``
|
||||
- Llama 3.1 70 B, FLUX
|
||||
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
|
||||
|
||||
* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
|
||||
* - ``csvkit``
|
||||
- FLUX
|
||||
- `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
|
||||
|
||||
Getting started
|
||||
===============
|
||||
* - ``deepspeed``
|
||||
- FLUX
|
||||
- `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
|
||||
|
||||
The prebuilt PyTorch with ROCm training environment allows users to quickly validate
|
||||
system performance, conduct training benchmarks, and achieve superior
|
||||
performance for models like Llama 3.1 and Llama 2. This container should not be
|
||||
expected to provide generalized performance across all training workloads. You
|
||||
can expect the container to perform in the model configurations described in
|
||||
the following section, but other configurations are not validated by AMD.
|
||||
* - ``diffusers``
|
||||
- FLUX
|
||||
- `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
|
||||
|
||||
Use the following instructions to set up the environment, configure the script
|
||||
to train models, and reproduce the benchmark results on MI325X and MI300X
|
||||
accelerators with the AMD PyTorch training Docker image.
|
||||
* - ``GitPython``
|
||||
- FLUX
|
||||
- `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
|
||||
|
||||
Once your environment is set up, use the following commands and examples to start benchmarking.
|
||||
* - ``opencv-python-headless``
|
||||
- FLUX
|
||||
- `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
|
||||
|
||||
Pretraining
|
||||
-----------
|
||||
* - ``peft``
|
||||
- FLUX
|
||||
- `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
|
||||
|
||||
To start the pretraining benchmark, use the following command with the
|
||||
appropriate options. See the following list of options and their descriptions.
|
||||
* - ``protobuf``
|
||||
- FLUX
|
||||
- `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
|
||||
|
||||
.. code-block:: shell
|
||||
* - ``pytest``
|
||||
- FLUX
|
||||
- `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
|
||||
|
||||
./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
|
||||
* - ``python-dotenv``
|
||||
- FLUX
|
||||
- `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
|
||||
|
||||
Options and available models
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* - ``seaborn``
|
||||
- FLUX
|
||||
- `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
* - ``transformers``
|
||||
- FLUX
|
||||
- `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
|
||||
|
||||
* - ``$training_mode``
|
||||
- ``pretrain``
|
||||
- Benchmark pretraining
|
||||
* `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
|
||||
|
||||
* -
|
||||
- ``finetune_fw``
|
||||
- Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
|
||||
* `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
|
||||
* -
|
||||
- ``finetune_lora``
|
||||
- Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
|
||||
Along with the following datasets:
|
||||
|
||||
* -
|
||||
- ``HF_finetune_lora``
|
||||
- Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
|
||||
* `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
|
||||
|
||||
* - ``$datatype``
|
||||
- ``FP8`` or ``BF16``
|
||||
- Only Llama 3.1 8B supports FP8 precision.
|
||||
* `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
|
||||
|
||||
* - ``$model_repo``
|
||||
- ``Llama-3.1-8B``
|
||||
- `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
|
||||
* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
|
||||
|
||||
* -
|
||||
- ``Llama-3.1-70B``
|
||||
- `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
|
||||
.. rubric:: Pretraining
|
||||
|
||||
* -
|
||||
- ``Llama-2-70B``
|
||||
- `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
|
||||
To start the pretraining benchmark, use the following command with the
|
||||
appropriate options. See the following list of options and their descriptions.
|
||||
|
||||
* -
|
||||
- ``Flux``
|
||||
- `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
.. code-block:: shell
|
||||
|
||||
* - ``$sequence_length``
|
||||
- Sequence length for the language model.
|
||||
- Between 2048 and 8192. 8192 by default.
|
||||
./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
|
||||
|
||||
.. note::
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
Occasionally, downloading the Flux dataset might fail. In the event of this
|
||||
error, manually download it from Hugging Face at
|
||||
`black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
|
||||
the required dataset.
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
Fine-tuning
|
||||
-----------
|
||||
* - ``$training_mode``
|
||||
- ``pretrain``
|
||||
- Benchmark pretraining
|
||||
|
||||
To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
|
||||
with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
|
||||
* -
|
||||
- ``finetune_fw``
|
||||
- Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
|
||||
|
||||
.. code-block:: shell
|
||||
* -
|
||||
- ``finetune_lora``
|
||||
- Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
|
||||
|
||||
./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
|
||||
* -
|
||||
- ``HF_finetune_lora``
|
||||
- Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
|
||||
|
||||
Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
|
||||
`Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
|
||||
* - ``$datatype``
|
||||
- ``FP8`` or ``BF16``
|
||||
- Only Llama 3.1 8B supports FP8 precision.
|
||||
|
||||
.. code-block:: shell
|
||||
* - ``$model_repo``
|
||||
- ``Llama-3.3-70B``
|
||||
- `Llama 3.3 70B <https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct>`_
|
||||
|
||||
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
|
||||
* -
|
||||
- ``Llama-3.1-8B``
|
||||
- `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
|
||||
|
||||
Benchmarking examples
|
||||
---------------------
|
||||
* -
|
||||
- ``Llama-3.1-70B``
|
||||
- `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
|
||||
|
||||
Here are some examples of how to use the command.
|
||||
* -
|
||||
- ``Llama-2-70B``
|
||||
- `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
|
||||
|
||||
* Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
|
||||
* -
|
||||
- ``Flux``
|
||||
- `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
|
||||
.. code-block:: shell
|
||||
* - ``$sequence_length``
|
||||
- Sequence length for the language model.
|
||||
- Between 2048 and 8192. 8192 by default.
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
|
||||
.. note::
|
||||
|
||||
* Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
|
||||
Occasionally, downloading the Flux dataset might fail. In the event of this
|
||||
error, manually download it from Hugging Face at
|
||||
`black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
|
||||
the required dataset.
|
||||
|
||||
.. code-block:: shell
|
||||
.. rubric:: Fine-tuning
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
|
||||
To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
|
||||
with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
|
||||
|
||||
* Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
|
||||
.. code-block:: shell
|
||||
|
||||
.. code-block:: shell
|
||||
./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
|
||||
Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
|
||||
`Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
|
||||
|
||||
* Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
|
||||
.. code-block:: shell
|
||||
|
||||
.. code-block:: shell
|
||||
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
|
||||
.. rubric:: Benchmarking examples
|
||||
|
||||
* Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
|
||||
Here are some example commands to get started pretraining and fine-tuning with various model configurations.
|
||||
|
||||
.. code-block:: shell
|
||||
* Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
|
||||
.. code-block:: shell
|
||||
|
||||
* Example 6: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
|
||||
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
|
||||
|
||||
.. code-block:: shell
|
||||
* Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
|
||||
|
||||
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
|
||||
|
||||
* Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
|
||||
|
||||
* Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
|
||||
|
||||
* Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
|
||||
|
||||
* Example 6: Torchtune full weight fine-tuning with Llama-3.3-70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.3-70B
|
||||
|
||||
* Example 7: Torchtune LoRA fine-tuning with Llama-3.3-70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.3-70B
|
||||
|
||||
* Example 8: Torchtune QLoRA fine-tuning with Llama-3.3-70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t finetune_qlora -p BF16 -m Llama-3.3-70B
|
||||
|
||||
* Example 9: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
@@ -399,6 +439,13 @@ benchmarking, see the version-specific documentation.
|
||||
- PyTorch version
|
||||
- Resources
|
||||
|
||||
* - v25.4
|
||||
- 6.3.0
|
||||
- 2.7.0a0+git637433
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.4/images/sha256-fa98a9aa69968e654466c06f05aaa12730db79b48b113c1ab4f7a5fe6920a20b>`_
|
||||
|
||||
* - v25.3
|
||||
- 6.3.0
|
||||
- 2.7.0a0+git637433
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
(communication-libraries)=
|
||||
|
||||
* {doc}`RCCL <rccl:index>`
|
||||
* [rocSHMEM](https://github.com/ROCm/rocSHMEM)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} Math
|
||||
|
||||
@@ -12,14 +12,14 @@ subtrees:
|
||||
- file: compatibility/compatibility-matrix.rst
|
||||
title: Compatibility matrix
|
||||
entries:
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/reference/system-requirements.html
|
||||
title: Linux system requirements
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
|
||||
title: Windows system requirements
|
||||
|
||||
- caption: Install
|
||||
entries:
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/
|
||||
title: ROCm on Linux
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
|
||||
title: HIP SDK on Windows
|
||||
@@ -75,7 +75,9 @@ subtrees:
|
||||
- file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
|
||||
title: LLM inference frameworks
|
||||
- file: how-to/rocm-for-ai/inference/vllm-benchmark.rst
|
||||
title: Performance testing
|
||||
title: vLLM inference performance testing
|
||||
- file: how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
|
||||
title: PyTorch inference performance testing
|
||||
- file: how-to/rocm-for-ai/inference/deploy-your-model.rst
|
||||
title: Deploy your model
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ ROCm is a software stack, composed primarily of open-source software, that
|
||||
provides the tools for programming AMD Graphics Processing Units (GPUs), from
|
||||
low-level kernels to high-level end-user applications.
|
||||
|
||||
.. image:: data/rocm-software-stack-6_3_2.jpg
|
||||
.. image:: data/rocm-software-stack-6_4_0.jpg
|
||||
:width: 800
|
||||
:alt: AMD's ROCm software stack and enabling technologies.
|
||||
:align: center
|
||||
@@ -52,6 +52,7 @@ Communication
|
||||
:header: "Component", "Description"
|
||||
|
||||
":doc:`RCCL <rccl:index>`", "Standalone library that provides multi-GPU and multi-node collective communication primitives"
|
||||
"`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_", "Runtime that provides GPU-centric networking through an OpenSHMEM-like interface. This intra-kernel networking library simplifies application code complexity and enables more fine-grained communication/computation overlap than traditional host-driven networking."
|
||||
|
||||
Math
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@@ -12,17 +12,36 @@ RELEASE_FLAG?=-r
|
||||
ASAN_DEP:=
|
||||
ifeq (${ENABLE_ADDRESS_SANITIZER},true)
|
||||
ASAN_DEP=lightning
|
||||
SANITIZER_FLAG=-a
|
||||
SANITIZER_FLAG=--address_sanitizer
|
||||
endif
|
||||
|
||||
# Set STATIC_FLAG for static builds
|
||||
ifeq (${ENABLE_STATIC_BUILDS},true)
|
||||
STATIC_FLAG=-s
|
||||
endif
|
||||
|
||||
export INFRA_REPO:=ROCm/tools/rocm-build
|
||||
|
||||
# # commannds to be run at makefile read time
|
||||
# # should only output $$OUT_DIR to stdout
|
||||
# # In an ideal world this would be another target
|
||||
# define INITBUILD
|
||||
# source ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1;
|
||||
# [ -w "$${ROCM_INSTALL_PATH}" ] || sudo mkdir -p -m 775 "$${ROCM_INSTALL_PATH}" ;
|
||||
# sudo chown "$$(id -u):$$(id -g)" "$${ROCM_INSTALL_PATH}" "/home/$$(id -un)" ;
|
||||
# mkdir -p ${HOME}/.ccache ;
|
||||
# echo $${OUT_DIR} ;
|
||||
# endef
|
||||
|
||||
OUT_DIR:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${OUT_DIR})
|
||||
ROCM_INSTALL_PATH:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${ROCM_INSTALL_PATH})
|
||||
|
||||
$(info OUT_DIR=${OUT_DIR})
|
||||
$(info ROCM_INSTALL_PATH=${ROCM_INSTALL_PATH})
|
||||
|
||||
# define SILENT to be empty to see the runner invocation
|
||||
SILENT?= @
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Internal stuff. Could be put in a different file to hide it.
|
||||
# Internal macros, they need to be defined before being used.
|
||||
@@ -47,7 +66,8 @@ endif
|
||||
# It should not be needed.
|
||||
define adddep =
|
||||
$(strip $(call peval,components+= $(1) $(2))
|
||||
$(foreach comp,$(strip $2),$(call peval,${OUT_DIR}/logs/${1}: ${OUT_DIR}/logs/${comp}))
|
||||
$(call peval,$(1)_DEPS += $(2))
|
||||
$(foreach comp,$(strip $2),$(call peval,${OUT_DIR}/logs/${1}.txt: ${OUT_DIR}/logs/${comp}.txt))
|
||||
)
|
||||
endef
|
||||
# End of internal stuff that is needed at the start of the file
|
||||
@@ -67,6 +87,7 @@ endef
|
||||
|
||||
$(call adddep,amd_smi_lib,${ASAN_DEP})
|
||||
$(call adddep,aqlprofile,${ASAN_DEP} rocr)
|
||||
$(call adddep,aqlprofiletest,lightning rocminfo aqlprofile opencl_on_rocclr hip_on_rocclr)
|
||||
$(call adddep,comgr,lightning devicelibs)
|
||||
$(call adddep,dbgapi,rocr comgr)
|
||||
$(call adddep,devicelibs,lightning)
|
||||
@@ -83,27 +104,29 @@ $(call adddep,rocm-core,${ASAN_DEP})
|
||||
$(call adddep,rocm-gdb,dbgapi)
|
||||
$(call adddep,rocminfo,${ASAN_DEP} rocr)
|
||||
$(call adddep,rocprofiler-register,${ASAN_DEP})
|
||||
$(call adddep,rocprofiler-sdk,${ASAN_DEP} rocr aqlprofile opencl_on_rocclr hip_on_rocclr comgr)
|
||||
$(call adddep,rocprofiler-sdk,${ASAN_DEP} rocr aqlprofile opencl_on_rocclr hip_on_rocclr comgr rccl rocdecode)
|
||||
$(call adddep,rocprofiler-systems,${ASAN_DEP} hipcc rocr hip_on_rocclr rocm_smi_lib rocprofiler roctracer rocprofiler-sdk)
|
||||
$(call adddep,rocprofiler,${ASAN_DEP} rocr roctracer aqlprofile opencl_on_rocclr hip_on_rocclr comgr)
|
||||
$(call adddep,rocprofiler-compute,${ASAN_DEP})
|
||||
$(call adddep,rocr,${ASAN_DEP} lightning rocm_smi_lib devicelibs rocprofiler-register)
|
||||
$(call adddep,rocr_debug_agent,${ASAN_DEP} hip_on_rocclr rocr dbgapi)
|
||||
$(call adddep,rocrsamples,lightning devicelibs rocr )
|
||||
$(call adddep,roctracer,${ASAN_DEP} rocr hip_on_rocclr)
|
||||
|
||||
|
||||
# rocm-dev points to all possible last finish components of Stage1 build.
|
||||
rocm-dev-components :=amd_smi_lib aqlprofile comgr dbgapi devicelibs hip_on_rocclr hipcc hipify_clang \
|
||||
rocm-dev-components :=amd_smi_lib aqlprofile aqlprofiletest comgr dbgapi devicelibs hip_on_rocclr hipcc hipify_clang \
|
||||
lightning rocprofiler-compute opencl_on_rocclr openmp_extras rocm_bandwidth_test rocm_smi_lib \
|
||||
rocm-cmake rocm-core rocm-gdb rocminfo rocprofiler-register rocprofiler-sdk rocprofiler-systems \
|
||||
rocprofiler rocr rocr_debug_agent roctracer
|
||||
$(call adddep,rocm-dev,$(filter-out ${NOBUILD} kernel_ubuntu,${rocm-dev-components}))
|
||||
rocprofiler rocr rocr_debug_agent rocrsamples roctracer
|
||||
$(call adddep,rocm-dev,$(filter-out ${NOBUILD},${rocm-dev-components}))
|
||||
|
||||
$(call adddep,amdmigraphx,hip_on_rocclr half rocblas miopen-hip lightning hipcc hiptensor)
|
||||
$(call adddep,composable_kernel,lightning hipcc hip_on_rocclr rocm-cmake)
|
||||
$(call adddep,half,rocm-cmake)
|
||||
$(call adddep,hipblas-common,lightning)
|
||||
$(call adddep,hipblas,hip_on_rocclr rocblas rocsolver lightning hipcc)
|
||||
$(call adddep,hipblaslt,hip_on_rocclr openmp_extras lightning hipcc hipblas-common rocm-dev)
|
||||
$(call adddep,hipblaslt,hip_on_rocclr openmp_extras lightning hipcc hipblas-common roctracer)
|
||||
$(call adddep,hipcub,hip_on_rocclr rocprim lightning hipcc)
|
||||
$(call adddep,hipfft,hip_on_rocclr openmp_extras rocfft rocrand hiprand lightning hipcc)
|
||||
$(call adddep,hipfort,rocblas hipblas rocsparse hipsparse rocfft hipfft rocrand hiprand rocsolver hipsolver lightning hipcc)
|
||||
@@ -113,26 +136,34 @@ $(call adddep,hipsparse,hip_on_rocclr rocsparse lightning hipcc)
|
||||
$(call adddep,hipsparselt,hip_on_rocclr hipsparse lightning hipcc openmp_extras)
|
||||
$(call adddep,hiptensor,hip_on_rocclr composable_kernel lightning hipcc)
|
||||
$(call adddep,miopen-deps,lightning hipcc)
|
||||
$(call adddep,miopen-hip,composable_kernel half hip_on_rocclr miopen-deps hipblas hipblaslt rocrand roctracer lightning hipcc)
|
||||
$(call adddep,miopen-hip,rocm-core composable_kernel half hip_on_rocclr miopen-deps hipblas hipblaslt rocrand roctracer lightning hipcc)
|
||||
$(call adddep,mivisionx,amdmigraphx miopen-hip rpp lightning hipcc)
|
||||
$(call adddep,rccl,rocm-core hip_on_rocclr rocr lightning hipcc rocm_smi_lib hipify_clang)
|
||||
$(call adddep,rdc,rocm_smi_lib rocprofiler rocmvalidationsuite)
|
||||
$(call adddep,rdc,amd_smi_lib rocprofiler-sdk rocm_smi_lib rocprofiler rocmvalidationsuite)
|
||||
$(call adddep,rocalution,rocblas rocsparse rocrand lightning hipcc)
|
||||
$(call adddep,rocblas,hip_on_rocclr openmp_extras lightning hipcc hipblaslt)
|
||||
$(call adddep,rocblas,rocminfo hip_on_rocclr openmp_extras lightning hipcc hipblaslt)
|
||||
$(call adddep,rocal,mivisionx)
|
||||
$(call adddep,rocdecode,hip_on_rocclr lightning hipcc amdmigraphx)
|
||||
$(call adddep,rocdecode,hip_on_rocclr lightning hipcc)
|
||||
$(call adddep,rocfft,hip_on_rocclr rocrand hiprand lightning hipcc openmp_extras)
|
||||
$(call adddep,rocjpeg,hip_on_rocclr lightning hipcc rocm-dev)
|
||||
$(call adddep,rocjpeg,hip_on_rocclr lightning hipcc)
|
||||
$(call adddep,rocmvalidationsuite,hip_on_rocclr rocr hipblas hiprand hipblaslt rocm-core lightning hipcc rocm_smi_lib)
|
||||
$(call adddep,rocprim,hip_on_rocclr lightning hipcc)
|
||||
$(call adddep,rocrand,hip_on_rocclr lightning hipcc)
|
||||
$(call adddep,rocshmem,rccl )
|
||||
$(call adddep,rocsolver,hip_on_rocclr rocblas rocsparse rocprim lightning hipcc)
|
||||
$(call adddep,rocsparse,hip_on_rocclr rocprim lightning hipcc)
|
||||
$(call adddep,rocthrust,hip_on_rocclr rocprim lightning hipcc)
|
||||
$(call adddep,rocwmma,hip_on_rocclr rocblas lightning hipcc rocm-cmake rocm_smi_lib)
|
||||
$(call adddep,rpp,half lightning hipcc openmp_extras)
|
||||
$(call adddep,transferbench,hip_on_rocclr lightning hipcc)
|
||||
ifneq ($(filter rocm-dev upload-rocm-dev, ${MAKECMDGOALS}),)
|
||||
components = $(rocm-dev-components)
|
||||
endif
|
||||
$(call adddep,rocm,$(filter-out ${NOBUILD} rocm,${components}))
|
||||
|
||||
ifeq ($(DISTRO_NAME),rhel)
|
||||
WHL_GEN :=
|
||||
endif
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# The rest of the file is internal
|
||||
@@ -165,7 +196,7 @@ ifeq (${toplevel},)
|
||||
define toplevel =
|
||||
|
||||
# The "target" make, this builds the package if it is out of date
|
||||
T_$1: ${OUT_DIR}/logs/$1 FRC
|
||||
T_$1: ${OUT_DIR}/logs/$1.txt FRC
|
||||
: $1 built
|
||||
|
||||
# The "upload" for $1, it uploads the packages for $1 to the central storage
|
||||
@@ -176,25 +207,26 @@ U_$1: T_$1 FRC
|
||||
# The "clean" for $1, it just marks the target as not existing so it will be built
|
||||
# in the future.
|
||||
C_$1: FRC
|
||||
rm -f ${OUT_DIR}/logs/$1 ${OUT_DIR}/logs/$1.repackaged
|
||||
rm -f ${OUT_DIR}/logs/$1.txt ${OUT_DIR}/logs/$1.repackaged
|
||||
|
||||
# parallel build {
|
||||
${OUT_DIR}/logs/$1: | ${OUT_DIR}/logs
|
||||
ifneq ($(wildcard ${OUT_DIR}/logs/$1.repackaged),)
|
||||
# parallel build
|
||||
${OUT_DIR}/logs/$1.txt: | ${OUT_DIR}/logs
|
||||
ifneq ($(wildcard ${OUT_DIR}/logs/$1.repackaged),) # {
|
||||
@echo Skipping build of $1 as it has already been repackaged
|
||||
cat $$@.repackaged > $$@
|
||||
rm -f $$@.repackaged
|
||||
cat $${@:.txt=.repackaged} > $$@
|
||||
rm -f $${@:.txt=.repackaged}
|
||||
else # } {
|
||||
@echo $1 started due to $$? | sed "s:${OUT_DIR}/logs/::g"
|
||||
# Build in a subshell so we get the time output
|
||||
# Pass in jobserver info using the RMAKE variable
|
||||
${RMAKE}@( if set -x && source $${INFRA_REPO}/envsetup.sh && \
|
||||
rm -f $$@.errors $$@ $$@.repackaged && \
|
||||
$${INFRA_REPO}/build_$1.sh -c && \
|
||||
time bash -x $${INFRA_REPO}/build_$1.sh $${RELEASE_FLAG} $${SANITIZER_FLAG} && $${INFRA_REPO}/post_inst_pkg.sh "$1" ; \
|
||||
then mv $$@.inprogress $$@ ; \
|
||||
else mv $$@.inprogress $$@.errors ; echo Error in $1 >&2 ; exit 1 ;\
|
||||
fi ) > $$@.inprogress 2>&1
|
||||
# Allow project specific flags e..g. ROCMBUILD_lightning.
|
||||
${RMAKE}${SILENT}( if set -x && source $${INFRA_REPO}/envsetup.sh && \
|
||||
rm -f $${@:$1.txt=1.Errors.$1.txt} $$@ $${@:.txt=.repackaged} && \
|
||||
$${INFRA_REPO}/runner $1 $${RELEASE_FLAG} $${SANITIZER_FLAG} $${STATIC_FLAG} ${ROCMBUILD_$1}; \
|
||||
then mv $${@:$1.txt=2.Inprogress.$1.txt} $$@ ; \
|
||||
else mv $${@:$1.txt=2.Inprogress.$1.txt} $${@:$1.txt=1.Errors.$1.txt} ;\
|
||||
echo Error in $1 >&2 ; exit 1 ;\
|
||||
fi ) > $${@:$1.txt=2.Inprogress.$1.txt} 2>&1
|
||||
endif # }
|
||||
|
||||
# end of toplevel macro
|
||||
@@ -227,22 +259,45 @@ upload-rocm-dev: $(addprefix U_,$(filter-out ${NOBUILD},${components}))
|
||||
rocm-dev: $(addprefix T_,$(filter-out ${NOBUILD},${components}))
|
||||
@echo rocm-dev built
|
||||
|
||||
ifeq ($(DISTRO_NAME),almalinux)
|
||||
@sudo chmod -R 777 "/home/builder"
|
||||
endif
|
||||
|
||||
# This code is broken. It stops us exiting a container and
|
||||
# starting a new one and continueing the build. The attempt
|
||||
# is to have run-once code.
|
||||
${OUT_DIR}/logs:
|
||||
sudo mkdir -p -m 775 "${ROCM_INSTALL_PATH}" && \
|
||||
sudo chown -R "$(shell id -u):$(shell id -g)" "/opt"
|
||||
sudo chown -R "$(shell id -u):$(shell id -g)" "/home/$(shell id -un)"
|
||||
sudo chown "$(shell id -u):$(shell id -g)" "/home/$(shell id -un)"
|
||||
mkdir -p "${@}"
|
||||
mkdir -p ${HOME}/.ccache
|
||||
|
||||
##help clean: remove the output directory and recreate it
|
||||
clean:
|
||||
[ -n "${OUT_DIR}" ] && rm -rf "${OUT_DIR}"
|
||||
mkdir -p ${OUT_DIR}/logs
|
||||
# mkdir -p ${OUT_DIR}/logs
|
||||
|
||||
.SECONDARY: ${components:%=${OUT_DIR}/logs/%}
|
||||
|
||||
.PHONY: all clean repack help list_components
|
||||
|
||||
# get_all_deps: Recursively get all dependencies for a given component.
|
||||
# Usage: $(call get_all_deps,component_name,)
|
||||
# - component_name: The name of the component to get dependencies for.
|
||||
# - The second parameter is an internal parameter used to track already
|
||||
# processed components to avoid circular dependencies.
|
||||
define get_all_deps
|
||||
$(if $(filter $(1),$(2)),,\
|
||||
$(sort $(1) $(foreach d,$($(1)_DEPS),$(call get_all_deps,$d,$(1) $(2))))
|
||||
)
|
||||
endef
|
||||
|
||||
##help deps_<component>: output the dependencies for <component>
|
||||
deps_%:
|
||||
@echo "=== Dependencies for [$*] ==="
|
||||
@echo "$(filter-out $*,$(call get_all_deps,$*,))"
|
||||
|
||||
##help list_components: output the list of components
|
||||
##help : Hint make list_components | paste - - - | column -t
|
||||
list_components:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
|
||||
|
||||
@@ -10,7 +9,9 @@ printUsage() {
|
||||
echo " -c, --clean Removes all amd_smi build artifacts"
|
||||
echo " -r, --release Build non-debug version amd_smi (default is debug)"
|
||||
echo " -a, --address_sanitizer Enable address sanitizer"
|
||||
echo " -s, --static Build static lib (.a). build instead of dynamic/shared(.so) "
|
||||
echo " -s, --static Component/Build does not support static builds just accepting this param & ignore. No effect of the param on this build"
|
||||
echo " -w, --wheel Creates python wheel package of amd-smi.
|
||||
It needs to be used along with -r option"
|
||||
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of type referred to by pkg_type"
|
||||
echo " -p, --package <type> Specify packaging format"
|
||||
echo " -h, --help Prints this help"
|
||||
@@ -25,7 +26,6 @@ printUsage() {
|
||||
PROJ_NAME="amdsmi"
|
||||
PACKAGE_ROOT="$(getPackageRoot)"
|
||||
TARGET="build"
|
||||
|
||||
PACKAGE_LIB=$(getLibPath)
|
||||
PACKAGE_INCLUDE="$(getIncludePath)"
|
||||
AMDSMI_BUILD_DIR=$(getBuildPath $PROJ_NAME)
|
||||
@@ -42,7 +42,7 @@ SHARED_LIBS="ON"
|
||||
CLEAN_OR_OUT=0;
|
||||
PKGTYPE="deb"
|
||||
|
||||
VALID_STR=`getopt -o hcraso:p: --long help,clean,release,static,address_sanitizer,outdir:,package: -- "$@"`
|
||||
VALID_STR=`getopt -o hcraswo:p: --long help,clean,release,static,wheel,address_sanitizer,outdir:,package: -- "$@"`
|
||||
eval set -- "$VALID_STR"
|
||||
|
||||
while true ;
|
||||
@@ -60,6 +60,8 @@ do
|
||||
ADDRESS_SANITIZER=true ; shift ;;
|
||||
(-s | --static)
|
||||
ack_and_skip_static ;;
|
||||
(-w | --wheel)
|
||||
WHEEL_PACKAGE=true ; shift ;;
|
||||
(-o | --outdir)
|
||||
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
|
||||
(-p | --package)
|
||||
@@ -135,7 +137,7 @@ verifyEnvSetup
|
||||
|
||||
case $TARGET in
|
||||
(clean) clean_amdsmi ;;
|
||||
(build) build_amdsmi ;;
|
||||
(build) build_amdsmi; build_wheel "$AMDSMI_BUILD_DIR" "$PROJ_NAME" ;;
|
||||
(outdir) print_output_directory ;;
|
||||
(*) die "Invalid target $TARGET" ;;
|
||||
esac
|
||||
|
||||
@@ -1,41 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/compute_utils.sh"
|
||||
|
||||
set_component_src AMDMIGraphX
|
||||
|
||||
build_amdmigraphx() {
|
||||
echo "Start build"
|
||||
|
||||
if [ "${ENABLE_STATIC_BUILDS}" == "true" ]; then
|
||||
ack_and_skip_static
|
||||
fi
|
||||
|
||||
cd $COMPONENT_SRC
|
||||
|
||||
if ! command -v rbuild &> /dev/null; then
|
||||
pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
|
||||
fi
|
||||
|
||||
# Remove CK
|
||||
xargs -d '\n' -a ${OUT_DIR}/ck.files rm -- || true
|
||||
|
||||
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
|
||||
set_asan_env_vars
|
||||
set_address_sanitizer_on
|
||||
fi
|
||||
|
||||
if [ -n "$GPU_ARCHS" ]; then
|
||||
GPU_TARGETS="$GPU_ARCHS"
|
||||
else
|
||||
GPU_TARGETS="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx942;gfx1200;gfx1201"
|
||||
fi
|
||||
init_rocm_common_cmake_params
|
||||
|
||||
mkdir -p ${BUILD_DIR} && rm -rf ${BUILD_DIR}/* && mkdir -p ${HOME}/amdmigraphx && rm -rf ${HOME}/amdmigraphx/*
|
||||
rbuild package -d "${HOME}/amdmigraphx" -B "${BUILD_DIR}" \
|
||||
--cxx="${ROCM_PATH}/llvm/bin/clang++" \
|
||||
--cc="${ROCM_PATH}/llvm/bin/clang" \
|
||||
--cxx="$(set_build_variables __CLANG++__)" \
|
||||
--cc="$(set_build_variables __CLANG__)" \
|
||||
"${rocm_math_common_cmake_params[@]}" \
|
||||
-DCMAKE_MODULE_LINKER_FLAGS="-Wl,--enable-new-dtags,--build-id=sha1,--rpath,$ROCM_LIB_RPATH" \
|
||||
-DGPU_TARGETS="${GPU_TARGETS}" \
|
||||
-DCMAKE_INSTALL_RPATH=""
|
||||
|
||||
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
|
||||
copy_if "${PKGTYPE}" "${CPACKGEN:-"DEB;RPM"}" "${PACKAGE_DIR}" "${BUILD_DIR}"/*."${PKGTYPE}"
|
||||
cd $BUILD_DIR && cmake --build . -- install -j${PROC}
|
||||
|
||||
show_build_cache_stats
|
||||
@@ -50,7 +51,7 @@ clean_amdmigraphx() {
|
||||
stage2_command_args "$@"
|
||||
|
||||
case $TARGET in
|
||||
build) build_amdmigraphx ;;
|
||||
build) build_amdmigraphx; build_wheel ;;
|
||||
outdir) print_output_directory ;;
|
||||
clean) clean_amdmigraphx ;;
|
||||
*) die "Invalid target $TARGET" ;;
|
||||
|
||||
136
tools/rocm-build/build_clang-ocl.sh
Executable file
136
tools/rocm-build/build_clang-ocl.sh
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/bin/bash
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
|
||||
|
||||
printUsage() {
|
||||
echo
|
||||
echo "Usage: $(basename "${BASH_SOURCE}") [-c|-r|-h] [makeopts]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -c, --clean Removes all clang-ocl build artifacts"
|
||||
echo " -r, --release Build non-debug version clang-ocl (default is debug)"
|
||||
echo " -a, --address_sanitizer Enable address sanitizer"
|
||||
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
|
||||
type referred to by pkg_type"
|
||||
echo " -h, --help Prints this help"
|
||||
echo " -s, --static Supports static CI by accepting this param & not bailing out. No effect of the param though"
|
||||
echo
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
TARGET="build"
|
||||
CLANG_OCL_DEST="$(getBinPath)"
|
||||
CLANG_OCL_SRC_ROOT="$CLANG_OCL_ROOT"
|
||||
CLANG_OCL_BUILD_DIR="$(getBuildPath clang-ocl)"
|
||||
|
||||
MAKEARG="$DASH_JAY"
|
||||
PACKAGE_ROOT="$(getPackageRoot)"
|
||||
PACKAGE_UTILS="$(getUtilsPath)"
|
||||
CLANG_OCL_PACKAGE_DEB="$PACKAGE_ROOT/deb/clang-ocl"
|
||||
CLANG_OCL_PACKAGE_RPM="$PACKAGE_ROOT/rpm/clang-ocl"
|
||||
BUILD_TYPE="Debug"
|
||||
SHARED_LIBS="ON"
|
||||
CLEAN_OR_OUT=0;
|
||||
MAKETARGET="deb"
|
||||
PKGTYPE="deb"
|
||||
|
||||
|
||||
VALID_STR=`getopt -o hcraso:g: --long help,clean,release,clean,static,address_sanitizer,outdir:,gpu_list: -- "$@"`
|
||||
eval set -- "$VALID_STR"
|
||||
|
||||
while true ;
|
||||
do
|
||||
case "$1" in
|
||||
(-h | --help)
|
||||
printUsage ; exit 0;;
|
||||
(-c | --clean)
|
||||
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
|
||||
(-r | --release)
|
||||
MAKEARG="$MAKEARG BUILD_TYPE=rel" ; BUILD_TYPE="Release" ; shift ;;
|
||||
(-a | --address_sanitizer)
|
||||
set_asan_env_vars
|
||||
set_address_sanitizer_on ; shift ;;
|
||||
(-s | --static)
|
||||
SHARED_LIBS="OFF" ; shift ;;
|
||||
(-o | --outdir)
|
||||
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
|
||||
(-g | --gpu_list )
|
||||
GPU_LIST=$2; shift 2 ;;
|
||||
--) shift; break;;
|
||||
(*)
|
||||
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
|
||||
esac
|
||||
|
||||
done
|
||||
|
||||
RET_CONFLICT=1
|
||||
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
|
||||
if [ $RET_CONFLICT -ge 30 ]; then
|
||||
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
|
||||
exit $RET_CONFLICT
|
||||
fi
|
||||
|
||||
clean_clang-ocl() {
|
||||
echo "Removing clang-ocl"
|
||||
rm -rf $CLANG_OCL_DEST/clang-ocl
|
||||
rm -rf $CLANG_OCL_BUILD_DIR
|
||||
rm -rf $CLANG_OCL_PACKAGE_DEB
|
||||
rm -rf $CLANG_OCL_PACKAGE_RPM
|
||||
}
|
||||
|
||||
build_clang-ocl() {
|
||||
if [ ! -d "$CLANG_OCL_BUILD_DIR" ]; then
|
||||
mkdir -p $CLANG_OCL_BUILD_DIR
|
||||
pushd $CLANG_OCL_BUILD_DIR
|
||||
|
||||
if [ -e $PACKAGE_ROOT/lib/bitcode/opencl.amdgcn.bc ]; then
|
||||
BC_DIR="$ROCM_INSTALL_PATH/lib"
|
||||
else
|
||||
BC_DIR="$ROCM_INSTALL_PATH/amdgcn/bitcode"
|
||||
fi
|
||||
|
||||
cmake \
|
||||
$(rocm_cmake_params) \
|
||||
-DDISABLE_CHECKS="ON" \
|
||||
-DCLANG_BIN="$ROCM_INSTALL_PATH/llvm/bin" \
|
||||
-DBITCODE_DIR="$BC_DIR" \
|
||||
$(rocm_common_cmake_params) \
|
||||
-DCPACK_SET_DESTDIR="OFF" \
|
||||
$CLANG_OCL_SRC_ROOT
|
||||
|
||||
echo "Making clang-ocl:"
|
||||
cmake --build . -- $MAKEARG
|
||||
cmake --build . -- $MAKEARG install
|
||||
cmake --build . -- $MAKEARG package
|
||||
popd
|
||||
fi
|
||||
|
||||
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_DEB" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.deb
|
||||
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_RPM" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.rpm
|
||||
}
|
||||
|
||||
|
||||
print_output_directory() {
|
||||
case ${PKGTYPE} in
|
||||
("deb")
|
||||
echo ${CLANG_OCL_PACKAGE_DEB};;
|
||||
("rpm")
|
||||
echo ${CLANG_OCL_PACKAGE_RPM};;
|
||||
(*)
|
||||
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
|
||||
esac
|
||||
exit
|
||||
}
|
||||
|
||||
case $TARGET in
|
||||
(clean) clean_clang-ocl ;;
|
||||
(build) build_clang-ocl ;;
|
||||
(outdir) print_output_directory ;;
|
||||
(*) die "Invalid target $TARGET" ;;
|
||||
esac
|
||||
|
||||
echo "Operation complete"
|
||||
exit 0
|
||||
|
||||
@@ -14,6 +14,7 @@ printUsage() {
|
||||
type referred to by pkg_type"
|
||||
echo " -h, --help Prints this help"
|
||||
echo " -s, --static Build static lib (.a). build instead of dynamic/shared(.so) "
|
||||
echo " -w, --wheel Creates python wheel package of comgr. It needs to be used along with -r option"
|
||||
echo " -l, --link_llvm_static Link to LLVM statically. Default is to dynamically link to LLVM; requires that LLVM dylibs are created."
|
||||
echo
|
||||
echo "Possible values for <type>:"
|
||||
@@ -24,6 +25,7 @@ printUsage() {
|
||||
return 0
|
||||
}
|
||||
|
||||
## Build environment variables
|
||||
API_NAME=amd_comgr
|
||||
PROJ_NAME=$API_NAME
|
||||
LIB_NAME=lib${API_NAME}
|
||||
@@ -33,8 +35,8 @@ PACKAGE_ROOT=$(getPackageRoot)
|
||||
PACKAGE_LIB=$(getLibPath)
|
||||
PACKAGE_INCLUDE=$(getIncludePath)
|
||||
BUILD_DIR=$(getBuildPath $API_NAME)
|
||||
PACKAGE_DEB=$(getPackageRoot)/deb/$API_NAME
|
||||
PACKAGE_RPM=$(getPackageRoot)/rpm/$API_NAME
|
||||
PACKAGE_DEB=$PACKAGE_ROOT/deb/$PROJ_NAME
|
||||
PACKAGE_RPM=$PACKAGE_ROOT/rpm/$PROJ_NAME
|
||||
PACKAGE_PREFIX=$ROCM_INSTALL_PATH
|
||||
BUILD_TYPE=Debug
|
||||
MAKE_OPTS="$DASH_JAY CTEST_OUTPUT_ON_FAILURE=1 -C $BUILD_DIR"
|
||||
@@ -44,13 +46,17 @@ CLEAN_OR_OUT=0;
|
||||
PKGTYPE="deb"
|
||||
MAKETARGET="deb"
|
||||
|
||||
# link to LLVM dynamicaly. Default is to link to llvm dynamically.
|
||||
# temporarily set this default to OFF until we resolve Issues
|
||||
LINK_LLVM_DYLIB="OFF"
|
||||
|
||||
VALID_STR=`getopt -o hcraslo:p: --long help,clean,release,address_sanitizer,static,link_llvm_static,outdir:,package: -- "$@"`
|
||||
#parse the arguments
|
||||
VALID_STR=`getopt -o hcraswlo:p: --long help,clean,release,address_sanitizer,static,wheel,link_llvm_static,outdir:,package: -- "$@"`
|
||||
eval set -- "$VALID_STR"
|
||||
|
||||
while true ;
|
||||
do
|
||||
#echo "parocessing $1"
|
||||
case "$1" in
|
||||
(-h | --help)
|
||||
printUsage ; exit 0;;
|
||||
@@ -63,6 +69,8 @@ do
|
||||
set_address_sanitizer_on ; shift ;;
|
||||
(-s | --static)
|
||||
SHARED_LIBS="OFF" ; shift ;;
|
||||
(-w | --wheel)
|
||||
WHEEL_PACKAGE=true ; shift ;;
|
||||
(-l | --link_llvm_static)
|
||||
LINK_LLVM_DYLIB="OFF"; shift ;;
|
||||
(-o | --outdir)
|
||||
@@ -105,6 +113,7 @@ build() {
|
||||
echo " Building Shared Object "
|
||||
fi
|
||||
|
||||
# Remove CTEST var once SWDEV-381396 is fixed
|
||||
cmake \
|
||||
$(rocm_cmake_params) \
|
||||
-DBUILD_SHARED_LIBS=$SHARED_LIBS \
|
||||
@@ -143,7 +152,7 @@ verifyEnvSetup
|
||||
|
||||
case $TARGET in
|
||||
(clean) clean ;;
|
||||
(build) build ;;
|
||||
(build) build; build_wheel "$BUILD_DIR" "$PROJ_NAME" ;;
|
||||
(outdir) print_output_directory ;;
|
||||
(*) die "Invalid target $TARGET" ;;
|
||||
esac
|
||||
|
||||
@@ -2,10 +2,14 @@
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/compute_utils.sh"
|
||||
|
||||
# Temporarily disable Address Sanitizer
|
||||
ENABLE_ADDRESS_SANITIZER=false
|
||||
|
||||
set_component_src composable_kernel
|
||||
|
||||
disable_debug_package_generation
|
||||
# Set the GPU_ARCH_LIST to the supported GPUs needed after https://github.com/ROCm/composable_kernel/pull/1536/
|
||||
GPU_ARCH_LIST="gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
|
||||
build_miopen_ck() {
|
||||
@@ -23,10 +27,17 @@ build_miopen_ck() {
|
||||
GPU_ARCH_LIST="gfx942"
|
||||
ack_and_skip_static
|
||||
fi
|
||||
|
||||
# if ENABLE_GPU_ARCH is set in env by Job parameter ENABLE_GPU_ARCH, then set GPU_ARCH_LIST to that value
|
||||
# This needs to be removed when CK aligns with other component and uses -DAMDGPU_TARGET or _DGPO_TARGET
|
||||
# then we can use set_gpu_arch from compure_helper.sh and get rid of all if clauses
|
||||
if [ -n "$ENABLE_GPU_ARCH" ]; then
|
||||
GPU_ARCH_LIST="$ENABLE_GPU_ARCH"
|
||||
fi
|
||||
# Latest CK requiring Python 3.8 as the minimum.
|
||||
# Point CMake to that explicit location and adjust LD_LIBRARY_PATH.
|
||||
PYTHON_VERSION_WORKAROUND=''
|
||||
echo "DISTRO_ID: ${DISTRO_ID}"
|
||||
if [ "$DISTRO_ID" = "rhel-8.8" ] || [ "$DISTRO_ID" = "sles-15.5" ] ; then
|
||||
if [ "$DISTRO_ID" = "rhel-8.8" ] || [ "$DISTRO_NAME" == "sles" ] || [ "$DISTRO_ID" = "debian-10" ]; then
|
||||
EXTRA_PYTHON_PATH=/opt/Python-3.8.13
|
||||
PYTHON_VERSION_WORKAROUND="-DCK_USE_ALTERNATIVE_PYTHON=${EXTRA_PYTHON_PATH}/bin/python3.8"
|
||||
# For the python interpreter we need to export LD_LIBRARY_PATH.
|
||||
@@ -42,19 +53,20 @@ build_miopen_ck() {
|
||||
"${rocm_math_common_cmake_params[@]}" \
|
||||
${PYTHON_VERSION_WORKAROUND} \
|
||||
-DCPACK_GENERATOR="${PKGTYPE^^}" \
|
||||
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
|
||||
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
|
||||
-DCMAKE_CXX_COMPILER=$(set_build_variables __CLANG++__) \
|
||||
-DCMAKE_C_COMPILER=$(set_build_variables __CLANG__) \
|
||||
${LAUNCHER_FLAGS} \
|
||||
-DGPU_ARCHS="${GPU_ARCH_LIST}" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_CXX_FLAGS=" -O3 " \
|
||||
"$COMPONENT_SRC"
|
||||
|
||||
cmake --build . -- -j${PROC} package
|
||||
cmake --build "$BUILD_DIR" -- install
|
||||
mkdir -p $PACKAGE_DIR && cp ./*.${PKGTYPE} $PACKAGE_DIR
|
||||
|
||||
copy_if "${PKGTYPE}" "${CPACKGEN:-"DEB;RPM"}" "${PACKAGE_DIR}" "${BUILD_DIR}"/*."${PKGTYPE}"
|
||||
}
|
||||
|
||||
# Use the function to unset the LDFLAGS and CXXFLAGS
|
||||
# specifically set for ASAN
|
||||
unset_asan_env_vars() {
|
||||
ASAN_CMAKE_PARAMS="false"
|
||||
export ADDRESS_SANITIZER="OFF"
|
||||
@@ -68,85 +80,6 @@ set_address_sanitizer_off() {
|
||||
export LDFLAGS=""
|
||||
}
|
||||
|
||||
build_miopen_ckProf() {
|
||||
ENABLE_ADDRESS_SANITIZER=false
|
||||
echo "Start Building Composable Kernel Profiler"
|
||||
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
|
||||
set_asan_env_vars
|
||||
set_address_sanitizer_on
|
||||
else
|
||||
unset_asan_env_vars
|
||||
set_address_sanitizer_off
|
||||
fi
|
||||
|
||||
cd $COMPONENT_SRC
|
||||
cd "$BUILD_DIR"
|
||||
rm -rf *
|
||||
|
||||
architectures='gfx10 gfx11 gfx90 gfx94'
|
||||
if [ -n "$GPU_ARCHS" ]; then
|
||||
architectures=$(echo ${GPU_ARCHS} | awk -F';' '{for(i=1;i<=NF;i++) a[substr($i,1,5)]} END{for(i in a) printf i" "}')
|
||||
fi
|
||||
|
||||
for arch in ${architectures}
|
||||
do
|
||||
if [ "${ASAN_CMAKE_PARAMS}" == "true" ] ; then
|
||||
cmake -DBUILD_DEV=OFF \
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_PATH%-*}/lib/cmake;${ROCM_PATH%-*}/$ASAN_LIBDIR;${ROCM_PATH%-*}/llvm;${ROCM_PATH%-*}" \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-'RelWithDebInfo'} \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_LIB_RPATH" \
|
||||
-DCMAKE_EXE_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_EXE_RPATH" \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=1 \
|
||||
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
|
||||
-DCMAKE_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DCMAKE_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DROCM_DISABLE_LDCONFIG=ON \
|
||||
-DROCM_PATH="${ROCM_PATH}" \
|
||||
-DCPACK_GENERATOR="${PKGTYPE^^}" \
|
||||
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
|
||||
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
|
||||
${LAUNCHER_FLAGS} \
|
||||
-DPROFILER_ONLY=ON \
|
||||
-DENABLE_ASAN_PACKAGING=true \
|
||||
-DGPU_ARCH="${arch}" \
|
||||
"$COMPONENT_SRC"
|
||||
else
|
||||
cmake -DBUILD_DEV=OFF \
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_PATH%-*}" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN' \
|
||||
-DCMAKE_EXE_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN/../lib' \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=1 \
|
||||
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
|
||||
-DCMAKE_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DCMAKE_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
|
||||
-DROCM_DISABLE_LDCONFIG=ON \
|
||||
-DROCM_PATH="${ROCM_PATH}" \
|
||||
-DCPACK_GENERATOR="${PKGTYPE^^}" \
|
||||
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
|
||||
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
|
||||
${LAUNCHER_FLAGS} \
|
||||
-DPROFILER_ONLY=ON \
|
||||
-DGPU_ARCH="${arch}" \
|
||||
"$COMPONENT_SRC"
|
||||
fi
|
||||
|
||||
cmake --build . -- -j${PROC} package
|
||||
cp ./*ckprofiler*.${PKGTYPE} $PACKAGE_DIR
|
||||
rm -rf *
|
||||
done
|
||||
rm -rf _CPack_Packages/ && find -name '*.o' -delete
|
||||
|
||||
echo "Finished building Composable Kernel"
|
||||
show_build_cache_stats
|
||||
}
|
||||
|
||||
clean_miopen_ck() {
|
||||
echo "Cleaning MIOpen-CK build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
|
||||
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
|
||||
@@ -156,7 +89,7 @@ clean_miopen_ck() {
|
||||
stage2_command_args "$@"
|
||||
|
||||
case $TARGET in
|
||||
build) build_miopen_ck ;;
|
||||
build) build_miopen_ck; build_wheel ;;
|
||||
outdir) print_output_directory ;;
|
||||
clean) clean_miopen_ck ;;
|
||||
*) die "Invalid target $TARGET" ;;
|
||||
|
||||
@@ -11,6 +11,8 @@ printUsage() {
|
||||
echo " -r, --release Make a release build instead of a debug build"
|
||||
echo " --enable-assertions Enable assertions"
|
||||
echo " -a, --address_sanitizer Enable address sanitizer"
|
||||
echo " -w, --wheel Creates python wheel package of dbgapi.
|
||||
It needs to be used along with -r option"
|
||||
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
|
||||
type referred to by pkg_type"
|
||||
echo " -h, --help Prints this help"
|
||||
@@ -25,6 +27,7 @@ printUsage() {
|
||||
return 0
|
||||
}
|
||||
|
||||
## Build environment variables
|
||||
API_NAME=rocm-dbgapi
|
||||
AMD_DBGAPI_NAME=amd-dbgapi
|
||||
MAKEINSTALL_MANIFEST=makeinstall_manifest.txt
|
||||
@@ -36,21 +39,24 @@ PACKAGE_ROOT=$(getPackageRoot)
|
||||
PACKAGE_LIB=$(getLibPath)
|
||||
PACKAGE_INCLUDE=$(getIncludePath)
|
||||
BUILD_DIR=$(getBuildPath $API_NAME)
|
||||
PACKAGE_DEB=$(getPackageRoot)/deb/$API_NAME
|
||||
PACKAGE_RPM=$(getPackageRoot)/rpm/$API_NAME
|
||||
PACKAGE_DEB=$(getPackageRoot)/deb/$PROJ_NAME
|
||||
PACKAGE_RPM=$(getPackageRoot)/rpm/$PROJ_NAME
|
||||
#PACKAGE_PREFIX=$ROCM_INSTALL_PATH
|
||||
BUILD_TYPE=Debug
|
||||
MAKE_OPTS=($DASH_JAY -C "$BUILD_DIR")
|
||||
MAKE_OPTS=($DASH_JAY -C "$BUILD_DIR") # Note that DASH_JAY might have a space after the -j
|
||||
SHARED_LIBS="ON"
|
||||
CLEAN_OR_OUT=0;
|
||||
MAKETARGET="deb"
|
||||
PKGTYPE="deb"
|
||||
DODOCSBUILD=true
|
||||
|
||||
VALID_STR=$(getopt -o hcraso:p:M --long help,clean,release,enable-assertions,static,address_sanitizer,outdir:,package:skip_man_pages -- "$@")
|
||||
#parse the arguments
|
||||
VALID_STR=$(getopt -o hcraswo:p:M --long help,clean,release,enable-assertions,static,wheel,address_sanitizer,outdir:,package:skip_man_pages -- "$@")
|
||||
eval set -- "$VALID_STR"
|
||||
|
||||
while true ;
|
||||
do
|
||||
#echo "parocessing $1"
|
||||
case "$1" in
|
||||
(-h | --help)
|
||||
printUsage ; exit 0;;
|
||||
@@ -66,12 +72,14 @@ do
|
||||
set_address_sanitizer_on ;;
|
||||
(-s | --static)
|
||||
ack_and_skip_static ;;
|
||||
(-w | --wheel)
|
||||
WHEEL_PACKAGE=true ;;
|
||||
(-o | --outdir)
|
||||
TARGET="outdir"; PKGTYPE=$2 ; ((CLEAN_OR_OUT|=2)) ; shift 1 ;;
|
||||
(-M | --skip_man_pages) DODOCSBUILD=false;;
|
||||
(-p | --package)
|
||||
MAKETARGET="$2" ; shift 1;;
|
||||
--) shift; break;;
|
||||
--) shift; break;; # end delimiter
|
||||
(*)
|
||||
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
|
||||
esac
|
||||
@@ -104,7 +112,7 @@ build() {
|
||||
then
|
||||
echo " No $ROCM_DBGAPI_ROOT/CMakeLists.txt file, skipping rocm-dbgapi" >&2
|
||||
echo " No $ROCM_DBGAPI_ROOT/CMakeLists.txt file, skipping rocm-dbgapi"
|
||||
exit 0
|
||||
exit 0 # THis is not an error
|
||||
fi
|
||||
echo "Building $PROJ_NAME"
|
||||
|
||||
@@ -121,11 +129,14 @@ build() {
|
||||
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}"
|
||||
"$DODOCSBUILD" && cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" doc
|
||||
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" install
|
||||
|
||||
#install_manifest.txt is created by make install and make package with same name unless
|
||||
#component packaging is enabled. To avoid overwriting by make package,move the manifest file
|
||||
#to a different name and can be used for build clean up
|
||||
mv "$BUILD_DIR/install_manifest.txt" "$BUILD_DIR/$MAKEINSTALL_MANIFEST"
|
||||
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" package
|
||||
|
||||
mkdir -p "$PACKAGE_LIB"
|
||||
# handle the library being in more than one place to avoid breaking the build.
|
||||
(
|
||||
shopt -s nullglob
|
||||
cp -R "$BUILD_DIR/lib/${LIB_NAME}"* "$BUILD_DIR/${LIB_NAME}"* "$PACKAGE_LIB"
|
||||
@@ -151,7 +162,7 @@ verifyEnvSetup
|
||||
|
||||
case $TARGET in
|
||||
(clean) clean ;;
|
||||
(build) build ;;
|
||||
(build) build; build_wheel "$BUILD_DIR" "$PROJ_NAME";;
|
||||
(outdir) print_output_directory ;;
|
||||
(*) die "Invalid target $TARGET" ;;
|
||||
esac
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user