Compare commits

..

1 Commits

Author SHA1 Message Date
David Dixon
a796f337c1 add catch2 2025-09-05 00:43:38 +00:00
230 changed files with 6025 additions and 35229 deletions

View File

@@ -128,9 +128,6 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.28.6'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -155,7 +152,6 @@ jobs:
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=${{ job.target }} -DGPU_TARGETS=${{ job.target }}
-DAMDGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }}
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
@@ -196,9 +192,6 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.28.6'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -224,7 +217,6 @@ jobs:
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=${{ job.target }} -DGPU_TARGETS=${{ job.target }}
-DAMDGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }}
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include

View File

@@ -34,7 +34,6 @@ parameters:
default: default:
- cmake - cmake
- libnuma-dev - libnuma-dev
- libsimde-dev
- mesa-common-dev - mesa-common-dev
- ninja-build - ninja-build
- ocl-icd-libopencl1 - ocl-icd-libopencl1

View File

@@ -79,7 +79,7 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }} packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- task: Bash@3 - task: Bash@3
displayName: Add lit to PATH displayName: Add lit to PATH
inputs: inputs:

View File

@@ -131,7 +131,7 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -212,7 +212,7 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: ROCR-Runtime
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -37,7 +18,6 @@ parameters:
- libdrm-dev - libdrm-dev
- libelf-dev - libelf-dev
- libnuma-dev - libnuma-dev
- libsimde-dev
- ninja-build - ninja-build
- pkg-config - pkg-config
- name: rocmDependencies - name: rocmDependencies
@@ -65,10 +45,6 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ROCR_Runtime_build_${{ job.os }} - job: ROCR_Runtime_build_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
pool: pool:
vmImage: 'ubuntu-22.04' vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}: ${{ if eq(job.os, 'almalinux8') }}:
@@ -89,18 +65,14 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
os: ${{ job.os }} os: ${{ job.os }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
useAmdclang: false useAmdclang: false
extraBuildFlags: >- extraBuildFlags: >-
@@ -110,112 +82,105 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters: # parameters:
# aptPackages: ${{ parameters.aptPackages }} # aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
- job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }} dependsOn: ROCR_Runtime_build_${{ job.os }}
dependsOn: ROCR_Runtime_build_${{ job.os }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} packageManager: ${{ job.packageManager }}
packageManager: ${{ job.packageManager }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters:
parameters: checkoutRef: ${{ parameters.checkoutRef }}
checkoutRef: ${{ parameters.checkoutRef }} dependencyList: ${{ parameters.rocmTestDependencies }}
dependencyList: ${{ parameters.rocmTestDependencies }} gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
${{ if parameters.triggerDownstreamJobs }}: parameters:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} checkoutRepo: ${{ parameters.checkoutRepo }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} runRocminfo: false
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} - task: Bash@3
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml displayName: Build kfdtest
parameters: inputs:
runRocminfo: false targetType: 'inline'
- task: Bash@3 workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
displayName: Build kfdtest script: |
inputs: if [ -e /opt/rh/gcc-toolset-14/enable ]; then
targetType: 'inline' source /opt/rh/gcc-toolset-14/enable
workingDirectory: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest fi
script: | mkdir build && cd build
if [ -e /opt/rh/gcc-toolset-14/enable ]; then cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
source /opt/rh/gcc-toolset-14/enable make
fi - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
mkdir build && cd build parameters:
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm .. componentName: kfdtest
make testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
parameters: testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
componentName: kfdtest os: ${{ job.os }}
testExecutable: BIN_DIR=$(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh - task: Bash@3
testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes' displayName: Build rocrtst
testDir: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/scripts inputs:
os: ${{ job.os }} targetType: 'inline'
- task: Bash@3 workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
displayName: Build rocrtst script: |
inputs: echo $(Build.SourcesDirectory)/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
targetType: 'inline' sudo cat /etc/ld.so.conf.d/rocm-ci.conf
workingDirectory: $(Agent.BuildDirectory)/s/rocrtst/suites/test_common sudo ldconfig -v
script: | ldconfig -p
echo $(Agent.BuildDirectory)/s/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf if [ -e /opt/rh/gcc-toolset-14/enable ]; then
sudo cat /etc/ld.so.conf.d/rocm-ci.conf source /opt/rh/gcc-toolset-14/enable
sudo ldconfig -v fi
ldconfig -p BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
if [ -e /opt/rh/gcc-toolset-14/enable ]; then export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
source /opt/rh/gcc-toolset-14/enable mkdir build && cd build
fi cmake .. \
BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1) -DTARGET_DEVICES=${{ job.target }} \
mkdir build && cd build -DROCM_DIR=$(Agent.BuildDirectory)/rocm \
cmake .. \ -DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \ -DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include
-DTARGET_DEVICES=${{ job.target }} \ make
-DROCM_DIR=$(Agent.BuildDirectory)/rocm \ make rocrtst_kernels
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \ - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include parameters:
make componentName: rocrtst
make rocrtst_kernels testExecutable: ./rocrtst64
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
parameters: testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/${{ job.target }}
componentName: rocrtst os: ${{ job.os }}
testExecutable: ./rocrtst64 - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes' parameters:
testDir: $(Agent.BuildDirectory)/s//rocrtst/suites/test_common/build/${{ job.target }} aptPackages: ${{ parameters.aptPackages }}
os: ${{ job.os }} environment: test
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml gpuTarget: ${{ job.target }}
parameters: # docker image will be missing libhwloc5
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
# docker image will be missing libhwloc5

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: amdsmi
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -50,7 +31,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }} - job: amdsmi_build_${{ job.os }}
pool: pool:
${{ if eq(job.os, 'ubuntu2404') }}: ${{ if eq(job.os, 'ubuntu2404') }}:
vmImage: 'ubuntu-24.04' vmImage: 'ubuntu-24.04'
@@ -74,7 +55,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
@@ -85,54 +65,50 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
componentName: ${{ parameters.componentName }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters: # parameters:
# aptPackages: ${{ parameters.aptPackages }} # aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: amdsmi_test_${{ job.os }}_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }} dependsOn: amdsmi_build_${{ job.os }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} packageManager: ${{ job.packageManager }}
packageManager: ${{ job.packageManager }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml parameters:
parameters: runRocminfo: false
runRocminfo: false - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters:
parameters: componentName: amdsmi
componentName: ${{ parameters.componentName }} testDir: '$(Agent.BuildDirectory)'
testDir: '$(Agent.BuildDirectory)' testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst' testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} environment: test
environment: test gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }}

View File

@@ -1,174 +0,0 @@
parameters:
- name: componentName
type: string
default: aqlprofile
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
type: boolean
default: false
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- python3-pip
- name: rocmDependencies
type: object
default:
- clr
- llvm-project
- ROCR-Runtime
- name: rocmTestDependencies
type: object
default:
- clr
- llvm-project
- ROCR-Runtime
- rocprofiler-register
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ variables.MEDIUM_BUILD_POOL }}
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- gtest
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
consolidateBuildAndInstall: true
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/aqlprofile/cmake_modules
-DAQLPROFILE_BUILD_TESTS=ON
-DGPU_TARGETS=${{ job.target }}
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ if eq(job.os, 'ubuntu2204') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: ${{ parameters.componentName }}
testDir: $(Agent.BuildDirectory)/rocm/share/hsa-amd-aqlprofile/
testExecutable: ./run_tests.sh
testParameters: ''
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: hip-tests
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -79,10 +60,6 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: hip_tests_build_${{ job.target }} - job: hip_tests_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -99,18 +76,15 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
# compile hip-tests # compile hip-tests
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }} componentName: hip-tests
cmakeSourceDir: '../catch' cmakeSourceDir: '../catch'
customBuildTarget: build_tests customBuildTarget: build_tests
extraBuildFlags: >- extraBuildFlags: >-
@@ -122,12 +96,9 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -137,56 +108,52 @@ jobs:
extraEnvVars: extraEnvVars:
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: hip_tests_test_${{ job.target }}
- job: hip_tests_test_${{ job.target }} timeoutInMinutes: 240
timeoutInMinutes: 240 dependsOn: hip_tests_build_${{ job.target }}
dependsOn: hip_tests_build_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- checkout: none parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml aptPackages: ${{ parameters.aptPackages }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml gpuTarget: ${{ job.target }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml checkoutRef: ${{ parameters.checkoutRef }}
parameters: dependencyList: ${{ parameters.rocmTestDependencies }}
checkoutRef: ${{ parameters.checkoutRef }} gpuTarget: ${{ job.target }}
dependencyList: ${{ parameters.rocmTestDependencies }} - task: Bash@3
gpuTarget: ${{ job.target }} displayName: Symlink rocm_agent_enumerator
${{ if parameters.triggerDownstreamJobs }}: inputs:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} targetType: inline
- task: Bash@3 script: |
displayName: Symlink rocm_agent_enumerator # Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
inputs: sudo mkdir -p /opt/rocm/bin
targetType: inline sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
script: | - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
# Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
sudo mkdir -p /opt/rocm/bin parameters:
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator componentName: hip_tests
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml testDir: $(Agent.BuildDirectory)/rocm/share/hip
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:
componentName: ${{ parameters.componentName }} aptPackages: ${{ parameters.aptPackages }}
testDir: $(Agent.BuildDirectory)/rocm/share/hip environment: test
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml gpuTarget: ${{ job.target }}
parameters: optSymLink: true
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
optSymLink: true

View File

@@ -77,7 +77,6 @@ parameters:
- clr - clr
- hipBLAS-common - hipBLAS-common
- llvm-project - llvm-project
- rocm-cmake
- rocminfo - rocminfo
- rocm_smi_lib - rocm_smi_lib
- rocprofiler-register - rocprofiler-register
@@ -145,7 +144,7 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }} packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -179,7 +178,7 @@ jobs:
mkdir -p $(Agent.BuildDirectory)/temp-deps mkdir -p $(Agent.BuildDirectory)/temp-deps
cd $(Agent.BuildDirectory)/temp-deps cd $(Agent.BuildDirectory)/temp-deps
# position-independent LAPACK is required for almalinux8 builds # position-independent LAPACK is required for almalinux8 builds
cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/sparse/projects/hipblaslt/deps cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
make -j make -j
sudo make install sudo make install
- script: | - script: |
@@ -198,8 +197,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt
cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt/build
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include -DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include

View File

@@ -40,12 +40,10 @@ parameters:
- gfortran - gfortran
- libgfortran5 - libgfortran5
- libopenblas-dev - libopenblas-dev
- liblapack-dev
- name: pipModules - name: pipModules
type: object type: object
default: default:
- joblib - joblib
- msgpack
- name: rocmDependencies - name: rocmDependencies
type: object type: object
default: default:
@@ -54,7 +52,6 @@ parameters:
- hipSPARSE - hipSPARSE
- llvm-project - llvm-project
- rocBLAS - rocBLAS
- rocm-cmake
- rocm_smi_lib - rocm_smi_lib
- rocminfo - rocminfo
- rocprofiler-register - rocprofiler-register
@@ -68,7 +65,6 @@ parameters:
- llvm-project - llvm-project
- hipBLAS-common - hipBLAS-common
- hipBLASLt - hipBLASLt
- rocm-cmake
- rocBLAS - rocBLAS
- rocminfo - rocminfo
- rocprofiler-register - rocprofiler-register
@@ -112,7 +108,7 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }} packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -128,13 +124,10 @@ jobs:
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}: ${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
# NOTE: content between `---` is for transition support between old/new build systems
# and should be removed once transition is complete.
# -----------------------------
# Build and install gtest and lapack # Build and install gtest and lapack
# $(Pipeline.Workspace)/deps is a temporary folder for the build process # $(Pipeline.Workspace)/deps is a temporary folder for the build process
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo # $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
- script: mkdir -p $(Pipeline.Workspace)/deps - script: mkdir $(Pipeline.Workspace)/deps
displayName: Create temp folder for external dependencies displayName: Create temp folder for external dependencies
# hipSPARSELt already has a CMake script for external deps, so we can just run that # hipSPARSELt already has a CMake script for external deps, so we can just run that
# https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt # https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
@@ -150,35 +143,22 @@ jobs:
- script: sudo make install - script: sudo make install
displayName: Install hipSPARSELt external dependencies displayName: Install hipSPARSELt external dependencies
workingDirectory: $(Pipeline.Workspace)/deps workingDirectory: $(Pipeline.Workspace)/deps
# -----------------------------
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
# NOTE: the following options are old build only
# and can be removed after full transition to new build
# -DAMDGPU_TARGETS=${{ job.target }}
# -DCMAKE_Fortran_COMPILER=f95
# -DTensile_LOGIC=
# -DTensile_CPU_THREADS=
# -DTensile_LIBRARY_FORMAT=msgpack
# -DROCM_PATH=$(Agent.BuildDirectory)/rocm
# -DBUILD_CLIENTS_TESTS=ON
# -DBUILD_USE_LOCAL_TENSILE=OFF
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
-DGPU_TARGETS=${{ job.target }}
-DAMDGPU_TARGETS=${{ job.target }}
-DCMAKE_Fortran_COMPILER=f95 -DCMAKE_Fortran_COMPILER=f95
-DAMDGPU_TARGETS=${{ job.target }}
-DTensile_LOGIC= -DTensile_LOGIC=
-DTensile_CPU_THREADS= -DTensile_CPU_THREADS=
-DTensile_LIBRARY_FORMAT=msgpack -DTensile_LIBRARY_FORMAT=msgpack
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
-DROCM_PATH=$(Agent.BuildDirectory)/rocm -DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DBUILD_CLIENTS_TESTS=ON -DBUILD_CLIENTS_TESTS=ON
-DBUILD_USE_LOCAL_TENSILE=OFF -DBUILD_USE_LOCAL_TENSILE=OFF
-DHIPSPARSELT_ENABLE_FETCH=ON
-GNinja -GNinja
${{ if ne(parameters.sparseCheckoutDir, '') }}: ${{ if ne(parameters.sparseCheckoutDir, '') }}:
cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: hipTensor
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -70,7 +51,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.target }} - job: hipTensor_build_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -85,21 +66,17 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DROCM_PATH=$(Agent.BuildDirectory)/rocm -DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DHIPTENSOR_BUILD_TESTS=ON -DHIPTENSOR_BUILD_TESTS=ON
@@ -107,12 +84,9 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -120,47 +94,44 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: hipTensor_test_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.target }} timeoutInMinutes: 90
timeoutInMinutes: 90 dependsOn: hipTensor_build_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters:
parameters: checkoutRef: ${{ parameters.checkoutRef }}
checkoutRef: ${{ parameters.checkoutRef }} dependencyList: ${{ parameters.rocmTestDependencies }}
dependencyList: ${{ parameters.rocmTestDependencies }} gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
${{ if parameters.triggerDownstreamJobs }}: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml componentName: hipTensor
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
parameters: testParameters: '-E ".*-extended" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
componentName: ${{ parameters.componentName }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor' parameters:
testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml' aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml environment: test
parameters: gpuTarget: ${{ job.target }}
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -71,7 +71,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:

View File

@@ -1,308 +0,0 @@
parameters:
- name: componentName
type: string
default: origami
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
type: boolean
default: false
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- wget
- python3
- python3-dev
- python3-pip
- python3-venv
- libgtest-dev
- libboost-filesystem-dev
- libboost-program-options-dev
- name: pipModules
type: object
default:
- nanobind>=2.0.0
- pytest
- pytest-cov
- name: rocmDependencies
type: object
default:
- clr
- llvm-project
- rocm-cmake
- rocminfo
- ROCR-Runtime
- rocprofiler-register
- name: rocmTestDependencies
type: object
default:
- clr
- llvm-project
- rocm-cmake
- rocminfo
- ROCR-Runtime
- rocprofiler-register
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt }
- { os: almalinux8, packageManager: dnf }
testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
# - { os: ubuntu2204, packageManager: apt, target: gfx1151 }
# - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
- name: downstreamComponentMatrix
type: object
default:
- hipBLASLt:
name: hipBLASLt
sparseCheckoutDir: projects/hipblaslt
skipUnifiedBuild: 'false'
buildDependsOn:
- origami_build
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: origami_build_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- gtest
- ${{ if ne(job.os, 'almalinux8') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- catch2
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
os: ${{ job.os }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DORIGAMI_BUILD_SHARED_LIBS=ON
-DORIGAMI_ENABLE_PYTHON=ON
-DORIGAMI_BUILD_TESTING=ON
-DORIGAMI_ENABLE_FETCH=ON
-GNinja
- ${{ if ne(job.os, 'almalinux8') }}:
- task: PublishPipelineArtifact@1
displayName: 'Publish Build Directory Artifact'
inputs:
targetPath: '$(Agent.BuildDirectory)/s/build'
artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
publishLocation: 'pipeline'
- task: PublishPipelineArtifact@1
displayName: 'Publish Python Source Artifact'
inputs:
targetPath: '$(Agent.BuildDirectory)/s/python'
artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
publishLocation: 'pipeline'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}
componentName: ${{ parameters.componentName }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ if eq(parameters.unifiedBuild, False) }}:
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: origami_test_${{ job.os }}_${{ job.target }}
timeoutInMinutes: 120
dependsOn: origami_build_${{ job.os }}
condition:
and(succeeded(),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- gtest
- ${{ if ne(job.os, 'almalinux8') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- catch2
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: CMake@1
displayName: 'Origami Test CMake Configuration'
inputs:
cmakeArgs: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DORIGAMI_BUILD_SHARED_LIBS=ON
-DORIGAMI_ENABLE_PYTHON=ON
-DORIGAMI_BUILD_TESTING=ON
-GNinja
$(Agent.BuildDirectory)/s
- task: Bash@3
displayName: 'Build Origami Tests and Python Bindings'
inputs:
targetType: inline
workingDirectory: build
script: |
cmake --build . --target origami-tests origami_python -- -j$(nproc)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
# Run tests using CTest (discovers and runs both C++ and Python tests)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }}
testDir: 'build'
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
# Test pip install workflow
# - task: Bash@3
# displayName: 'Test Pip Install'
# inputs:
# targetType: inline
# script: |
# set -e
# echo "==================================================================="
# echo "Testing pip install workflow (pip install -e .)"
# echo "==================================================================="
# # Set environment variables for pip install CMake build
# export ROCM_PATH=$(Agent.BuildDirectory)/rocm
# export CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm:$(Agent.BuildDirectory)/vendor
# export CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
# echo "ROCM_PATH: $ROCM_PATH"
# echo "CMAKE_PREFIX_PATH: $CMAKE_PREFIX_PATH"
# echo "CMAKE_CXX_COMPILER: $CMAKE_CXX_COMPILER"
# echo ""
# # Install from source directory
# cd "$(Agent.BuildDirectory)/s/python"
# pip install -e .
# # Verify import works
# echo ""
# echo "Verifying origami can be imported..."
# python3 -c "import origami; print('✓ Successfully imported origami')"
# # Run pytest on installed package
# echo ""
# echo "Running pytest tests..."
# python3 -m pytest tests/ -v -m "not slow" --tb=short
# echo ""
# echo "==================================================================="
# echo "Pip install test completed successfully"
# echo "==================================================================="
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}
- ${{ if parameters.triggerDownstreamJobs }}:
- ${{ each component in parameters.downstreamComponentMatrix }}:
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
buildDependsOn: ${{ component.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
triggerDownstreamJobs: true
unifiedBuild: ${{ parameters.unifiedBuild }}

View File

@@ -1,35 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rccl
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
- name: systemsRepo
type: string
default: systems_repo
- name: systemsSparseCheckoutDir
type: string
default: 'projects/rocprofiler-sdk'
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -82,52 +57,37 @@ parameters:
type: object type: object
default: default:
buildJobs: buildJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- gfx90a:
target: gfx90a
testJobs: testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- name: downstreamComponentMatrix - gfx90a:
type: object target: gfx90a
default:
- rocprofiler-sdk:
name: rocprofiler-sdk
sparseCheckoutDir: ''
skipUnifiedBuild: 'false'
buildDependsOn:
- rccl_build
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }} - job: rccl_build_${{ job.target }}
${{ if parameters.buildDependsOn }}: timeoutInMinutes: 90
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}_${{ job.target }}
timeoutInMinutes: 120
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
- name: HIP_ROCCLR_HOME - name: HIP_ROCCLR_HOME
value: $(Build.BinariesDirectory)/rocm value: $(Build.BinariesDirectory)/rocm
pool: ${{ variables.MEDIUM_BUILD_POOL }} pool: ${{ variables.MEDIUM_BUILD_POOL }}
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace: workspace:
clean: all clean: all
steps: steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
submoduleBehaviour: recursive submoduleBehaviour: recursive
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters: parameters:
@@ -137,14 +97,10 @@ jobs:
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }}
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
@@ -156,87 +112,58 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ if eq(job.os, 'ubuntu2204') }}: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} extraEnvVars:
extraEnvVars: - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm installLatestCMake: true
installLatestCMake: true
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rccl_test_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }} timeoutInMinutes: 120
timeoutInMinutes: 120 dependsOn: rccl_build_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: gpuTarget: ${{ job.target }}
preTargetFilter: ${{ parameters.componentName }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
gpuTarget: ${{ job.target }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml checkoutRef: ${{ parameters.checkoutRef }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml dependencyList: ${{ parameters.rocmTestDependencies }}
parameters: gpuTarget: ${{ job.target }}
checkoutRef: ${{ parameters.checkoutRef }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
dependencyList: ${{ parameters.rocmTestDependencies }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
os: ${{ job.os }} parameters:
gpuTarget: ${{ job.target }} componentName: rccl
${{ if parameters.triggerDownstreamJobs }}: testDir: '$(Agent.BuildDirectory)/rocm/bin'
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} testExecutable: './rccl-UnitTests'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:
componentName: ${{ parameters.componentName }} aptPackages: ${{ parameters.aptPackages }}
os: ${{ job.os }} environment: test
testDir: '$(Agent.BuildDirectory)/rocm/bin' gpuTarget: ${{ job.target }}
testExecutable: './rccl-UnitTests'
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
- ${{ if parameters.triggerDownstreamJobs }}:
- ${{ each component in parameters.downstreamComponentMatrix }}:
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
parameters:
checkoutRepo: ${{ parameters.systemsRepo }}
sparseCheckoutDir: ${{ parameters.systemsSparseCheckoutDir }}
triggerDownstreamJobs: true
unifiedBuild: ${{ parameters.unifiedBuild }}
${{ if parameters.unifiedBuild }}:
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
${{ else }}:
buildDependsOn: ${{ component.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rdc
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -52,7 +33,6 @@ parameters:
- clr - clr
- hipBLAS-common - hipBLAS-common
- hipBLASLt - hipBLASLt
- hipRAND
- llvm-project - llvm-project
- rocBLAS - rocBLAS
- rocm-cmake - rocm-cmake
@@ -63,7 +43,6 @@ parameters:
- rocprofiler - rocprofiler
- rocprofiler-register - rocprofiler-register
- rocprofiler-sdk - rocprofiler-sdk
- rocRAND
- ROCR-Runtime - ROCR-Runtime
- name: rocmTestDependencies - name: rocmTestDependencies
type: object type: object
@@ -95,11 +74,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.target }} - job: rdc_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -110,22 +85,16 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.25.0'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
# Build grpc # Build grpc
- task: Bash@3 - task: Bash@3
displayName: 'git clone grpc' displayName: 'git clone grpc'
@@ -135,7 +104,6 @@ jobs:
workingDirectory: $(Build.SourcesDirectory) workingDirectory: $(Build.SourcesDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
cmakeSourceDir: $(Build.SourcesDirectory)/grpc cmakeSourceDir: $(Build.SourcesDirectory)/grpc
installDir: $(Build.SourcesDirectory)/bin installDir: $(Build.SourcesDirectory)/bin
@@ -149,7 +117,6 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DGRPC_ROOT="$(Build.SourcesDirectory)/bin" -DGRPC_ROOT="$(Build.SourcesDirectory)/bin"
@@ -159,12 +126,9 @@ jobs:
-DAMDGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -172,64 +136,60 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rdc_test_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.target }} dependsOn: rdc_build_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml - name: ROCM_PATH
- name: ROCM_PATH value: $(Agent.BuildDirectory)/rocm
value: $(Agent.BuildDirectory)/rocm - name: ROCM_DIR
- name: ROCM_DIR value: $(Agent.BuildDirectory)/rocm
value: $(Agent.BuildDirectory)/rocm pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- checkout: none parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml aptPackages: ${{ parameters.aptPackages }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml gpuTarget: ${{ job.target }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml checkoutRef: ${{ parameters.checkoutRef }}
parameters: dependencyList: ${{ parameters.rocmTestDependencies }}
checkoutRef: ${{ parameters.checkoutRef }} gpuTarget: ${{ job.target }}
dependencyList: ${{ parameters.rocmTestDependencies }} - task: Bash@3
gpuTarget: ${{ job.target }} displayName: Setup test environment
${{ if parameters.triggerDownstreamJobs }}: inputs:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} targetType: inline
- task: Bash@3 script: |
displayName: Setup test environment sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
inputs: echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
targetType: inline sudo ldconfig -v
script: | - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd - task: Bash@3
echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf displayName: Test rdc
sudo ldconfig -v inputs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml targetType: inline
- task: Bash@3 script: >-
displayName: Test rdc $(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
inputs: --batch_mode
targetType: inline --start_rdcd
script: >- --unauth_comm
$(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
--batch_mode parameters:
--start_rdcd aptPackages: ${{ parameters.aptPackages }}
--unauth_comm environment: test
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml gpuTarget: ${{ job.target }}
parameters: extraPaths: /home/user/workspace/rocm/bin
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}
extraPaths: /home/user/workspace/rocm/bin

View File

@@ -70,7 +70,6 @@ parameters:
- hipBLAS-common - hipBLAS-common
- hipBLASLt - hipBLASLt
- llvm-project - llvm-project
- rocm-cmake
- rocminfo - rocminfo
- rocprofiler-register - rocprofiler-register
- rocm_smi_lib - rocm_smi_lib
@@ -155,7 +154,7 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }} packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -180,8 +179,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/rocblas
cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/rocblas/build
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release

View File

@@ -8,25 +8,6 @@ parameters:
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
- name: rocPyDecodeRepo
type: string
default: rocpydecode_repo
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -75,23 +56,10 @@ parameters:
testJobs: testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - { os: ubuntu2204, packageManager: apt, target: gfx942 }
- { os: ubuntu2204, packageManager: apt, target: gfx90a } - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
type: object
default:
- rocPyDecode:
name: rocPyDecode
sparseCheckoutDir: ''
skipUnifiedBuild: 'false'
buildDependsOn:
- rocDecode_build
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }} - job: ${{ parameters.componentName }}_build_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -115,15 +83,12 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
os: ${{ job.os }} os: ${{ job.os }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
@@ -204,15 +169,3 @@ jobs:
registerROCmPackages: true registerROCmPackages: true
environment: test environment: test
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- ${{ if parameters.triggerDownstreamJobs }}:
- ${{ each component in parameters.downstreamComponentMatrix }}:
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
parameters:
checkoutRepo: ${{ parameters.rocPyDecodeRepo }}
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
buildDependsOn: ${{ component.buildDependsOn }}
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
triggerDownstreamJobs: true
unifiedBuild: ${{ parameters.unifiedBuild }}

View File

@@ -210,7 +210,7 @@ jobs:
parameters: parameters:
componentName: ${{ parameters.componentName }} componentName: ${{ parameters.componentName }}
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim' testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}' extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:

View File

@@ -5,22 +5,6 @@ parameters:
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -63,19 +47,19 @@ parameters:
type: object type: object
default: default:
buildJobs: buildJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- gfx90a:
target: gfx90a
testJobs: testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- gfx90a:
target: gfx90a
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocPyDecode_build_${{ job.target }} - job: rocPyDecode_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -90,20 +74,16 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: Bash@3 - task: Bash@3
displayName: 'Save Python Package Paths' displayName: 'Save Python Package Paths'
inputs: inputs:

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rocWMMA
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -85,11 +66,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.target }} - job: rocWMMA_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -104,7 +81,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
@@ -126,12 +102,9 @@ jobs:
# gfx1030 not supported in documentation # gfx1030 not supported in documentation
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -139,45 +112,43 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rocWMMA_test_${{ job.target }}
- job: ${{ parameters.componentName }}_test_${{ job.target }} timeoutInMinutes: 270
timeoutInMinutes: 350 dependsOn: rocWMMA_build_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: gpuTarget: ${{ job.target }}
preTargetFilter: ${{ parameters.componentName }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml checkoutRef: ${{ parameters.checkoutRef }}
parameters: dependencyList: ${{ parameters.rocmTestDependencies }}
checkoutRef: ${{ parameters.checkoutRef }} gpuTarget: ${{ job.target }}
dependencyList: ${{ parameters.rocmTestDependencies }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml componentName: rocWMMA
parameters: testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
componentName: ${{ parameters.componentName }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma' parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml aptPackages: ${{ parameters.aptPackages }}
parameters: environment: test
aptPackages: ${{ parameters.aptPackages }} gpuTarget: ${{ job.target }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -81,7 +81,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters: parameters:
componentName: rocm-cmake componentName: rocm-cmake
testParameters: '-E "pass-version-parent" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml' testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rocm-core
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -46,10 +27,6 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocm_core_${{ job.os }} - job: rocm_core_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
pool: pool:
${{ if eq(job.os, 'ubuntu2404') }}: ${{ if eq(job.os, 'ubuntu2404') }}:
vmImage: 'ubuntu-24.04' vmImage: 'ubuntu-24.04'
@@ -73,10 +50,8 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
useAmdclang: false useAmdclang: false
extraBuildFlags: >- extraBuildFlags: >-
@@ -90,12 +65,9 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml

View File

@@ -14,42 +14,16 @@ parameters:
type: object type: object
default: default:
- cmake - cmake
- libdw-dev
- libglfw3-dev - libglfw3-dev
- libmsgpack-dev - libmsgpack-dev
- libopencv-dev
- libtbb-dev - libtbb-dev
- libtiff-dev
- libva-amdgpu-dev
- libva2-amdgpu
- mesa-amdgpu-va-drivers
- libavcodec-dev
- libavformat-dev
- libavutil-dev
- ninja-build - ninja-build
- python3-pip - python3-pip
- protobuf-compiler
- libprotoc-dev
- libopencv-dev
- name: pipModules
type: object
default:
- future==1.0.0
- pytz==2022.1
- numpy==1.23
- google==3.0.0
- protobuf==3.12.4
- onnx==1.12.0
- nnef==1.0.7
- name: rocmDependencies - name: rocmDependencies
type: object type: object
default: default:
- AMDMIGraphX - AMDMIGraphX
- aomp
- aomp-extras
- clr - clr
- half
- composable_kernel
- hipBLAS - hipBLAS
- hipBLAS-common - hipBLAS-common
- hipBLASLt - hipBLASLt
@@ -59,37 +33,21 @@ parameters:
- hipRAND - hipRAND
- hipSOLVER - hipSOLVER
- hipSPARSE - hipSPARSE
- hipTensor
- llvm-project - llvm-project
- MIOpen
- MIVisionX
- rocm_smi_lib
- rccl
- rocAL
- rocALUTION
- rocBLAS - rocBLAS
- rocDecode
- rocFFT - rocFFT
- rocJPEG
- rocPRIM - rocPRIM
- rocprofiler-register - rocprofiler-register
- rocprofiler-sdk
- ROCR-Runtime - ROCR-Runtime
- rocRAND - rocRAND
- rocSOLVER - rocSOLVER
- rocSPARSE - rocSPARSE
- rocThrust - rocThrust
- rocWMMA
- rpp
- name: rocmTestDependencies - name: rocmTestDependencies
type: object type: object
default: default:
- AMDMIGraphX - AMDMIGraphX
- aomp
- aomp-extras
- clr - clr
- half
- composable_kernel
- hipBLAS - hipBLAS
- hipBLAS-common - hipBLAS-common
- hipBLASLt - hipBLASLt
@@ -99,30 +57,18 @@ parameters:
- hipRAND - hipRAND
- hipSOLVER - hipSOLVER
- hipSPARSE - hipSPARSE
- hipTensor
- llvm-project - llvm-project
- MIOpen
- MIVisionX
- rocm_smi_lib
- rccl
- rocAL
- rocALUTION
- rocBLAS - rocBLAS
- rocDecode
- rocFFT - rocFFT
- rocminfo - rocminfo
- rocPRIM - rocPRIM
- rocJPEG
- rocprofiler-register - rocprofiler-register
- rocprofiler-sdk
- ROCR-Runtime - ROCR-Runtime
- rocRAND - rocRAND
- rocSOLVER - rocSOLVER
- rocSPARSE - rocSPARSE
- rocThrust - rocThrust
- roctracer - roctracer
- rocWMMA
- rpp
- name: jobMatrix - name: jobMatrix
type: object type: object
@@ -151,11 +97,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.25.0'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -217,10 +158,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.25.0'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -251,6 +188,5 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test environment: test
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}

View File

@@ -43,14 +43,9 @@ parameters:
- ninja-build - ninja-build
- python3-pip - python3-pip
- python3-venv - python3-venv
- googletest
- libgtest-dev
- libgmock-dev
- libboost-filesystem-dev
- name: pipModules - name: pipModules
type: object type: object
default: default:
- msgpack
- joblib - joblib
- "packaging>=22.0" - "packaging>=22.0"
- pytest - pytest
@@ -107,7 +102,7 @@ jobs:
workspace: workspace:
clean: all clean: all
steps: steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
@@ -152,13 +147,6 @@ jobs:
echo "##vso[task.prependpath]$USER_BASE/bin" echo "##vso[task.prependpath]$USER_BASE/bin"
echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake" echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
displayName: Set cmake configure paths displayName: Set cmake configure paths
- task: Bash@3
displayName: Add ROCm binaries to PATH
inputs:
targetType: inline
script: |
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rocm_smi_lib
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -51,10 +32,6 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocm_smi_lib_build_${{ job.os }} - job: rocm_smi_lib_build_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
pool: pool:
${{ if eq(job.os, 'ubuntu2404') }}: ${{ if eq(job.os, 'ubuntu2404') }}:
vmImage: 'ubuntu-24.04' vmImage: 'ubuntu-24.04'
@@ -78,10 +55,8 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
useAmdclang: false useAmdclang: false
extraBuildFlags: >- extraBuildFlags: >-
@@ -90,56 +65,51 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters: # parameters:
# aptPackages: ${{ parameters.aptPackages }} # aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }} dependsOn: rocm_smi_lib_build_${{ job.os }}
dependsOn: rocm_smi_lib_build_${{ job.os }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- checkout: none parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml aptPackages: ${{ parameters.aptPackages }}
parameters: packageManager: ${{ job.packageManager }}
aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
packageManager: ${{ job.packageManager }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml os: ${{ job.os }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
os: ${{ job.os }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml runRocminfo: false
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
runRocminfo: false parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml componentName: rocm_smi_lib
parameters: testDir: '$(Agent.BuildDirectory)'
componentName: ${{ parameters.componentName }} testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
testDir: '$(Agent.BuildDirectory)' testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst' os: ${{ job.os }}
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
os: ${{ job.os }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml aptPackages: ${{ parameters.aptPackages }}
parameters: environment: test
aptPackages: ${{ parameters.aptPackages }} gpuTarget: ${{ job.target }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rocminfo
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -59,11 +40,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }} - job: rocminfo_build_${{ job.os }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
pool: pool:
vmImage: 'ubuntu-22.04' vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}: ${{ if eq(job.os, 'almalinux8') }}:
@@ -85,18 +62,14 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
os: ${{ job.os }} os: ${{ job.os }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
useAmdclang: false useAmdclang: false
extraBuildFlags: >- extraBuildFlags: >-
@@ -105,71 +78,65 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }} os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rocminfo_test_${{ job.target }}
- job: rocminfo_test_${{ job.target }} dependsOn: rocminfo_build_${{ job.os }}
dependsOn: rocminfo_build_${{ job.os }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} packageManager: ${{ job.packageManager }}
packageManager: ${{ job.packageManager }} registerROCmPackages: true
registerROCmPackages: true - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters:
parameters: os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters:
parameters: checkoutRef: ${{ parameters.checkoutRef }}
checkoutRef: ${{ parameters.checkoutRef }} dependencyList: ${{ parameters.rocmTestDependencies }}
dependencyList: ${{ parameters.rocmTestDependencies }} gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} os: ${{ job.os }}
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
${{ if parameters.triggerDownstreamJobs }}: parameters:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} runRocminfo: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters: parameters:
runRocminfo: false componentName: rocminfo
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml testDir: '$(Agent.BuildDirectory)'
parameters: testExecutable: './rocm/bin/rocminfo'
componentName: ${{ parameters.componentName }} testParameters: ''
testDir: '$(Agent.BuildDirectory)' testPublishResults: false
testExecutable: './rocm/bin/rocminfo' os: ${{ job.os }}
testParameters: '' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
testPublishResults: false parameters:
os: ${{ job.os }} componentName: rocm_agent_enumerator
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml testDir: '$(Agent.BuildDirectory)'
parameters: testExecutable: './rocm/bin/rocm_agent_enumerator'
componentName: rocm_agent_enumerator testParameters: ''
testDir: '$(Agent.BuildDirectory)' testPublishResults: false
testExecutable: './rocm/bin/rocm_agent_enumerator' os: ${{ job.os }}
testParameters: '' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
testPublishResults: false parameters:
os: ${{ job.os }} aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml registerROCmPackages: true
parameters: environment: test
aptPackages: ${{ parameters.aptPackages }} gpuTarget: ${{ job.target }}
registerROCmPackages: true
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -55,7 +55,6 @@ parameters:
- pymongo - pymongo
- pyyaml - pyyaml
- setuptools - setuptools
- sqlalchemy
- tabulate - tabulate
- textual - textual
- textual_plotext - textual_plotext
@@ -65,13 +64,6 @@ parameters:
- pytest - pytest
- pytest-cov - pytest-cov
- pytest-xdist - pytest-xdist
- name: rocmDependencies
type: object
default:
- clr
- llvm-project
- ROCR-Runtime
- rocprofiler-sdk
- name: rocmTestDependencies - name: rocmTestDependencies
type: object type: object
default: default:
@@ -108,12 +100,10 @@ jobs:
${{ if parameters.buildDependsOn }}: ${{ if parameters.buildDependsOn }}:
dependsOn: dependsOn:
- ${{ each build in parameters.buildDependsOn }}: - ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.target }} - ${{ build }}_${{ job.os }}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
pool: pool:
vmImage: ${{ variables.BASE_BUILD_POOL }} vmImage: ${{ variables.BASE_BUILD_POOL }}
workspace: workspace:
@@ -128,14 +118,6 @@ jobs:
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
extraBuildFlags: >- extraBuildFlags: >-

View File

@@ -1,29 +1,10 @@
parameters: parameters:
- name: componentName
type: string
default: rocprofiler-sdk
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -79,27 +60,23 @@ parameters:
type: object type: object
default: default:
buildJobs: buildJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- gfx90a:
target: gfx90a
testJobs: testJobs:
- { os: ubuntu2204, packageManager: apt, target: gfx942 } - gfx942:
- { os: ubuntu2204, packageManager: apt, target: gfx90a } target: gfx942
- gfx90a:
target: gfx90a
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }} - job: rocprofiler_sdk_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os}}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
pool: ${{ variables.MEDIUM_BUILD_POOL }} pool: ${{ variables.MEDIUM_BUILD_POOL }}
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace: workspace:
clean: all clean: all
steps: steps:
@@ -107,23 +84,18 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
registerROCmPackages: true registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: Bash@3 - task: Bash@3
displayName: Add Python site-packages binaries to path displayName: Add Python site-packages binaries to path
inputs: inputs:
@@ -133,8 +105,6 @@ jobs:
echo "##vso[task.prependpath]$USER_BASE/bin" echo "##vso[task.prependpath]$USER_BASE/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }}
extraBuildFlags: >- extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCPROFILER_BUILD_TESTS=ON -DROCPROFILER_BUILD_TESTS=ON
@@ -144,13 +114,9 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -160,72 +126,62 @@ jobs:
# gpuTarget: ${{ job.target }} # gpuTarget: ${{ job.target }}
# registerROCmPackages: true # registerROCmPackages: true
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rocprofiler_sdk_test_${{ job.target }}
- job: rocprofiler_sdk_test_${{ job.os }}_${{ job.target }} dependsOn: rocprofiler_sdk_build_${{ job.target }}
dependsOn: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml pool: ${{ job.target }}_test_pool
pool: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }}
pipModules: ${{ parameters.pipModules }} registerROCmPackages: true
packageManager: ${{ job.packageManager }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
registerROCmPackages: true - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml checkoutRepo: ${{ parameters.checkoutRepo }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
checkoutRepo: ${{ parameters.checkoutRepo }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml checkoutRef: ${{ parameters.checkoutRef }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml dependencyList: ${{ parameters.rocmDependencies }}
parameters: gpuTarget: ${{ job.target }}
checkoutRef: ${{ parameters.checkoutRef }} - task: Bash@3
dependencyList: ${{ parameters.rocmDependencies }} displayName: Add Python and ROCm binaries to path
os: ${{ job.os }} inputs:
gpuTarget: ${{ job.target }} targetType: inline
${{ if parameters.triggerDownstreamJobs }}: script: |
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} USER_BASE=$(python3 -m site --user-base)
- task: Bash@3 echo "##vso[task.prependpath]$USER_BASE/bin"
displayName: Add Python and ROCm binaries to path echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
inputs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
targetType: inline parameters:
script: | extraBuildFlags: >-
USER_BASE=$(python3 -m site --user-base) -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
echo "##vso[task.prependpath]$USER_BASE/bin" -DROCPROFILER_BUILD_TESTS=ON
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin" -DROCPROFILER_BUILD_SAMPLES=ON
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml -DROCPROFILER_BUILD_RELEASE=ON
parameters: -DGPU_TARGETS=${{ job.target }}
componentName: ${{ parameters.componentName }} -GNinja
os: ${{ job.os }} - template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml
extraBuildFlags: >- - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm parameters:
-DROCPROFILER_BUILD_TESTS=ON componentName: rocprofiler-sdk
-DROCPROFILER_BUILD_SAMPLES=ON - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-DROCPROFILER_BUILD_RELEASE=ON parameters:
-DGPU_TARGETS=${{ job.target }} aptPackages: ${{ parameters.aptPackages }}
-GNinja pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml environment: test
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml gpuTarget: ${{ job.target }}
parameters: registerROCmPackages: true
componentName: ${{ parameters.componentName }}
os: ${{ job.os }}
testDir: $(Agent.BuildDirectory)/build
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}
registerROCmPackages: true

View File

@@ -6,25 +6,6 @@ parameters:
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: componentName
type: string
default: rocprofiler-systems
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -106,10 +87,6 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocprofiler_systems_build_${{ job.target }} - job: rocprofiler_systems_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -128,7 +105,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
@@ -160,16 +136,12 @@ jobs:
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
-DGPU_TARGETS=${{ job.target }} -DGPU_TARGETS=${{ job.target }}
-GNinja -GNinja
componentName: ${{ parameters.componentName }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
componentName: ${{ parameters.componentName }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:
@@ -179,93 +151,85 @@ jobs:
registerROCmPackages: true registerROCmPackages: true
extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - job: rocprofiler_systems_test_${{ job.target }}
- job: rocprofiler_systems_test_${{ job.target }} dependsOn: rocprofiler_systems_build_${{ job.target }}
dependsOn: rocprofiler_systems_build_${{ job.target }} condition:
condition: and(succeeded(),
and(succeeded(), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), eq(${{ parameters.aggregatePipeline }}, False)
eq(${{ parameters.aggregatePipeline }}, False) )
) timeoutInMinutes: 180
timeoutInMinutes: 180 variables:
variables: - group: common
- group: common - template: /.azuredevops/variables-global.yml
- template: /.azuredevops/variables-global.yml - name: ROCM_PATH
- name: ROCM_PATH value: $(Agent.BuildDirectory)/rocm
value: $(Agent.BuildDirectory)/rocm pool:
pool: name: ${{ job.target }}_test_pool
name: ${{ job.target }}_test_pool workspace:
workspace: clean: all
clean: all steps:
steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters:
parameters: aptPackages: ${{ parameters.aptPackages }}
aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }}
pipModules: ${{ parameters.pipModules }} registerROCmPackages: true
registerROCmPackages: true - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters:
parameters: checkoutRepo: ${{ parameters.checkoutRepo }}
checkoutRepo: ${{ parameters.checkoutRepo }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters:
parameters: checkoutRef: ${{ parameters.checkoutRef }}
checkoutRef: ${{ parameters.checkoutRef }} dependencyList: ${{ parameters.rocmDependencies }}
dependencyList: ${{ parameters.rocmDependencies }} gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} - task: Bash@3
${{ if parameters.triggerDownstreamJobs }}: displayName: Add ROCm binaries to PATH
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} inputs:
- task: Bash@3 targetType: inline
displayName: Add ROCm binaries to PATH script: |
inputs: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
targetType: inline echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
script: | - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin" parameters:
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin" # build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml extraBuildFlags: >-
parameters: -DROCPROFSYS_BUILD_TESTING=ON
cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/rocprofiler-systems -DROCPROFSYS_BUILD_DYNINST=ON
# build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html -DROCPROFSYS_BUILD_LIBUNWIND=ON
extraBuildFlags: >- -DROCPROFSYS_DISABLE_EXAMPLES="openmp-target"
-DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocprofiler-systems -DDYNINST_BUILD_TBB=ON
-DROCPROFSYS_USE_PYTHON=ON -DDYNINST_BUILD_ELFUTILS=ON
-DROCPROFSYS_BUILD_TESTING=ON -DDYNINST_BUILD_LIBIBERTY=ON
-DROCPROFSYS_BUILD_DYNINST=ON -DDYNINST_BUILD_BOOST=ON
-DROCPROFSYS_BUILD_LIBUNWIND=ON -DROCPROFSYS_USE_PAPI=ON
-DROCPROFSYS_DISABLE_EXAMPLES="openmp-target" -DROCPROFSYS_USE_MPI=ON
-DDYNINST_BUILD_TBB=ON -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
-DDYNINST_BUILD_ELFUTILS=ON -DGPU_TARGETS=${{ job.target }}
-DDYNINST_BUILD_LIBIBERTY=ON -GNinja
-DDYNINST_BUILD_BOOST=ON - task: Bash@3
-DROCPROFSYS_USE_PAPI=ON displayName: Set up rocprofiler-systems env
-DROCPROFSYS_USE_MPI=ON inputs:
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg targetType: inline
-DGPU_TARGETS=${{ job.target }} script: source share/rocprofiler-systems/setup-env.sh
-GNinja workingDirectory: build
- task: Bash@3 - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
displayName: Set up rocprofiler-systems env parameters:
inputs: componentName: rocprofiler-systems
targetType: inline - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
script: source $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems/setup-env.sh parameters:
workingDirectory: $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }} gpuTarget: ${{ job.target }}
testDir: $(Agent.BuildDirectory)/s/build/tests/ - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
testParameters: '--output-on-failure' parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml aptPackages: ${{ parameters.aptPackages }}
parameters: pipModules: ${{ parameters.pipModules }}
gpuTarget: ${{ job.target }} environment: test
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml registerROCmPackages: true
parameters: gpuTarget: ${{ job.target }}
gpuTarget: ${{ job.target }} extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
registerROCmPackages: true
gpuTarget: ${{ job.target }}
extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin

View File

@@ -1,63 +0,0 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: cli11Version
type: string
default: ''
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt}
- { os: almalinux8, packageManager: dnf}
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: cli11_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: Bash@3
displayName: Clone cli11 ${{ parameters.cli11Version }}
inputs:
targetType: inline
script: git clone https://github.com/CLIUtils/CLI11.git -b ${{ parameters.cli11Version }}
workingDirectory: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
cmakeBuildDir: $(Agent.BuildDirectory)/CLI11/build
cmakeSourceDir: $(Agent.BuildDirectory)/CLI11
useAmdclang: false
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}

View File

@@ -1,66 +0,0 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: yamlcppVersion
type: string
default: ''
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt}
- { os: almalinux8, packageManager: dnf}
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: yamlcpp_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: Bash@3
displayName: Clone yaml-cpp ${{ parameters.yamlcppVersion }}
inputs:
targetType: inline
script: git clone https://github.com/jbeder/yaml-cpp.git -b ${{ parameters.yamlcppVersion }}
workingDirectory: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
cmakeBuildDir: $(Agent.BuildDirectory)/yaml-cpp/build
cmakeSourceDir: $(Agent.BuildDirectory)/yaml-cpp
useAmdclang: false
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-DYAML_CPP_BUILD_TOOLS=OFF
-DYAML_BUILD_SHARED_LIBS=OFF
-DYAML_CPP_INSTALL=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}

View File

@@ -1,23 +0,0 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: cli11Version
type: string
default: "main"
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/cli11.yml
parameters:
cli11Version: ${{ parameters.cli11Version }}

View File

@@ -1,24 +0,0 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: yamlcppVersion
type: string
default: "0.8.0"
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/yamlcpp.yml
parameters:
yamlcppVersion: ${{ parameters.yamlcppVersion }}

View File

@@ -1,15 +1,10 @@
parameters:
- name: cmakeVersion
type: string
default: '3.31.0'
steps: steps:
- task: Bash@3 - task: Bash@3
displayName: Install CMake ${{ parameters.cmakeVersion }} displayName: Install CMake 3.31
inputs: inputs:
targetType: inline targetType: inline
script: | script: |
CMAKE_VERSION=${{ parameters.cmakeVersion }} CMAKE_VERSION=3.31.0
CMAKE_ROOT="$(Pipeline.Workspace)/cmake" CMAKE_ROOT="$(Pipeline.Workspace)/cmake"
echo "Downloading CMake $CMAKE_VERSION..." echo "Downloading CMake $CMAKE_VERSION..."

View File

@@ -63,7 +63,6 @@ parameters:
libopenblas-dev: openblas-devel libopenblas-dev: openblas-devel
libopenmpi-dev: openmpi-devel libopenmpi-dev: openmpi-devel
libpci-dev: libpciaccess-devel libpci-dev: libpciaccess-devel
libsimde-dev: simde-devel
libssl-dev: openssl-devel libssl-dev: openssl-devel
# note: libstdc++-devel is in the base packages list # note: libstdc++-devel is in the base packages list
libsystemd-dev: systemd-devel libsystemd-dev: systemd-devel

View File

@@ -35,8 +35,8 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
amdsmi: amdsmi:
pipelineId: 376 pipelineId: 99
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
aomp-extras: aomp-extras:
pipelineId: 111 pipelineId: 111
@@ -46,10 +46,6 @@ parameters:
pipelineId: 115 pipelineId: 115
developBranch: aomp-dev developBranch: aomp-dev
hasGpuTarget: false hasGpuTarget: false
aqlprofile:
pipelineId: 365
developBranch: develop
hasGpuTarget: false
clr: clr:
pipelineId: 335 pipelineId: 335
developBranch: develop developBranch: develop
@@ -67,8 +63,8 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: false hasGpuTarget: false
hip-tests: hip-tests:
pipelineId: 362 pipelineId: 233
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
hipBLAS: hipBLAS:
pipelineId: 317 pipelineId: 317
@@ -115,7 +111,7 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
hipTensor: hipTensor:
pipelineId: 374 pipelineId: 105
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
llvm-project: llvm-project:
@@ -130,17 +126,13 @@ parameters:
pipelineId: 80 pipelineId: 80
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
origami:
pipelineId: 364
developBranch: develop
hasGpuTarget: true
rccl: rccl:
pipelineId: 107 pipelineId: 107
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
rdc: rdc:
pipelineId: 360 pipelineId: 100
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
rocAL: rocAL:
pipelineId: 151 pipelineId: 151
@@ -179,16 +171,16 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: false hasGpuTarget: false
rocm-core: rocm-core:
pipelineId: 349 pipelineId: 103
developBranch: develop developBranch: master
hasGpuTarget: false hasGpuTarget: false
rocm-examples: rocm-examples:
pipelineId: 216 pipelineId: 216
developBranch: amd-staging developBranch: amd-staging
hasGpuTarget: true hasGpuTarget: true
rocminfo: rocminfo:
pipelineId: 356 pipelineId: 91
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
rocMLIR: rocMLIR:
pipelineId: 229 pipelineId: 229
@@ -203,8 +195,8 @@ parameters:
developBranch: master developBranch: master
hasGpuTarget: false hasGpuTarget: false
rocm_smi_lib: rocm_smi_lib:
pipelineId: 358 pipelineId: 96
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
rocPRIM: rocPRIM:
pipelineId: 273 pipelineId: 273
@@ -215,7 +207,7 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
rocprofiler-compute: rocprofiler-compute:
pipelineId: 344 pipelineId: 257
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
rocprofiler-register: rocprofiler-register:
@@ -223,20 +215,20 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: false hasGpuTarget: false
rocprofiler-sdk: rocprofiler-sdk:
pipelineId: 347 pipelineId: 246
developBranch: develop developBranch: amd-staging
hasGpuTarget: true hasGpuTarget: true
rocprofiler-systems: rocprofiler-systems:
pipelineId: 345 pipelineId: 255
developBranch: develop developBranch: amd-staging
hasGpuTarget: true hasGpuTarget: true
rocPyDecode: rocPyDecode:
pipelineId: 239 pipelineId: 239
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
ROCR-Runtime: ROCR-Runtime:
pipelineId: 354 pipelineId: 10
developBranch: develop developBranch: amd-staging
hasGpuTarget: false hasGpuTarget: false
rocRAND: rocRAND:
pipelineId: 274 pipelineId: 274
@@ -259,11 +251,11 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
roctracer: roctracer:
pipelineId: 331 pipelineId: 141
developBranch: develop developBranch: amd-staging
hasGpuTarget: true hasGpuTarget: true
rocWMMA: rocWMMA:
pipelineId: 370 pipelineId: 109
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
rpp: rpp:

View File

@@ -13,7 +13,7 @@ parameters:
default: ctest default: ctest
- name: testParameters - name: testParameters
type: string type: string
default: --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml default: --output-on-failure --force-new-ctest-process --output-junit test_output.xml
- name: extraTestParameters - name: extraTestParameters
type: string type: string
default: '' default: ''

1
.gitignore vendored
View File

@@ -1,7 +1,6 @@
.venv .venv
.vscode .vscode
build build
__pycache__
# documentation artifacts # documentation artifacts
_build/ _build/

View File

@@ -27,7 +27,6 @@ ASICs
ASan ASan
ASAN ASAN
ASm ASm
Async
ATI ATI
atomicRMW atomicRMW
AddressSanitizer AddressSanitizer
@@ -35,7 +34,6 @@ AlexNet
Andrej Andrej
Arb Arb
Autocast Autocast
autograd
BARs BARs
BatchNorm BatchNorm
BLAS BLAS
@@ -45,7 +43,6 @@ Blit
Blockwise Blockwise
Bluefield Bluefield
Bootloader Bootloader
Broadcom
CAS CAS
CCD CCD
CDNA CDNA
@@ -65,7 +62,6 @@ CPU
CPUs CPUs
Cron Cron
CSC CSC
CSDATA
CSE CSE
CSV CSV
CSn CSn
@@ -75,11 +71,9 @@ CU
CUDA CUDA
CUs CUs
CXX CXX
CX
Cavium Cavium
CentOS CentOS
ChatGPT ChatGPT
Cholesky
CoRR CoRR
Codespaces Codespaces
Commitizen Commitizen
@@ -87,13 +81,10 @@ CommonMark
Concretized Concretized
Conda Conda
ConnectX ConnectX
CountOnes
CuPy CuPy
customizable
da da
Dashboarding Dashboarding
Dataloading Dataloading
dataflows
DBRX DBRX
DDR DDR
DF DF
@@ -106,7 +97,6 @@ DIMM
DKMS DKMS
DL DL
DMA DMA
DOMContentLoaded
DNN DNN
DNNL DNNL
DPM DPM
@@ -125,8 +115,6 @@ Dependabot
Deprecations Deprecations
DevCap DevCap
DirectX DirectX
Disaggregated
disaggregated
Dockerfile Dockerfile
Dockerized Dockerized
Doxygen Doxygen
@@ -135,14 +123,9 @@ ELMo
ENDPGM ENDPGM
EPYC EPYC
ESXi ESXi
EP
EoS EoS
etcd
equalto
fas fas
FBGEMM FBGEMM
FiLM
FIFOs
FFT FFT
FFTs FFTs
FFmpeg FFmpeg
@@ -155,19 +138,15 @@ Filesystem
FindDb FindDb
Flang Flang
FlashAttention FlashAttention
FlashInfers
FlashInfer
FluxBenchmark FluxBenchmark
Fortran Fortran
Fuyu Fuyu
GALB GALB
GAT GAT
GATNE
GCC GCC
GCD GCD
GCDs GCDs
GCN GCN
GCNN
GDB GDB
GDDR GDDR
GDR GDR
@@ -177,7 +156,6 @@ GEMMs
GFLOPS GFLOPS
GFortran GFortran
GFXIP GFXIP
GGUF
Gemma Gemma
GiB GiB
GIM GIM
@@ -186,19 +164,15 @@ Glibc
GLXT GLXT
Gloo Gloo
GMI GMI
GNN
GNNs
GPG GPG
GPR GPR
GPT GPT
GPU GPU
GPU's GPU's
GPUDirect
GPUs GPUs
GraphBolt Graphbolt
GraphSage GraphSage
GRBM GRBM
GRE
GenAI GenAI
GenZ GenZ
GitHub GitHub
@@ -225,11 +199,7 @@ Haswell
Higgs Higgs
href href
Hyperparameters Hyperparameters
HybridEngine
Huggingface Huggingface
Hunyuan
HunyuanVideo
IB
ICD ICD
ICT ICT
ICV ICV
@@ -238,11 +208,8 @@ IDEs
IFWI IFWI
IMDb IMDb
IncDec IncDec
instrSize
interpolators
IOMMU IOMMU
IOP IOP
IOPS
IOPM IOPM
IOV IOV
IRQ IRQ
@@ -259,9 +226,7 @@ Intersphinx
Intra Intra
Ioffe Ioffe
JAX's JAX's
JAXLIB
Jinja Jinja
js
JSON JSON
Jupyter Jupyter
KFD KFD
@@ -281,16 +246,12 @@ LLM
LLMs LLMs
LLVM LLVM
LM LM
logsumexp
LRU
LSAN LSAN
LSan LSan
LTS LTS
LSTMs LSTMs
LteAll
LanguageCrossEntropy LanguageCrossEntropy
LoRA LoRA
MECO
MEM MEM
MERCHANTABILITY MERCHANTABILITY
MFMA MFMA
@@ -309,7 +270,6 @@ MNIST
MPI MPI
MPT MPT
MSVC MSVC
mul
MVAPICH MVAPICH
MVFFR MVFFR
Makefile Makefile
@@ -317,7 +277,6 @@ Makefiles
Matplotlib Matplotlib
Matrox Matrox
MaxText MaxText
MBT
Megablocks Megablocks
Megatrends Megatrends
Megatron Megatron
@@ -327,18 +286,13 @@ Meta's
Miniconda Miniconda
MirroredStrategy MirroredStrategy
Mixtral Mixtral
MLA
MosaicML MosaicML
MoEs MoEs
Mooncake
Mpops Mpops
Multicore Multicore
Multithreaded Multithreaded
mx
MXFP
MyEnvironment MyEnvironment
MyST MyST
NANOO
NBIO NBIO
NBIOs NBIOs
NCCL NCCL
@@ -371,7 +325,6 @@ OFED
OMM OMM
OMP OMP
OMPI OMPI
OOM
OMPT OMPT
OMPX OMPX
ONNX ONNX
@@ -394,11 +347,9 @@ PCC
PCI PCI
PCIe PCIe
PEFT PEFT
perf
PEQT PEQT
PIL PIL
PILImage PILImage
PJRT
POR POR
PRNG PRNG
PRs PRs
@@ -418,7 +369,6 @@ Profiler's
PyPi PyPi
Pytest Pytest
PyTorch PyTorch
QPS
Qcycles Qcycles
Qwen Qwen
RAII RAII
@@ -480,9 +430,7 @@ SKU
SKUs SKUs
SLES SLES
SLURM SLURM
Slurm
SMEM SMEM
SMFMA
SMI SMI
SMT SMT
SPI SPI
@@ -494,24 +442,18 @@ SWE
SerDes SerDes
ShareGPT ShareGPT
Shlens Shlens
simd
Skylake Skylake
Softmax Softmax
Spack Spack
SplitK SplitK
Supermicro Supermicro
Szegedy Szegedy
TagRAM
TCA TCA
TCC TCC
TCCs
TCI TCI
TCIU TCIU
TCP TCP
TCR TCR
TVM
THREADGROUPS
threadgroups
TensorRT TensorRT
TensorFloat TensorFloat
TF TF
@@ -521,12 +463,13 @@ TPS
TPU TPU
TPUs TPUs
TSME TSME
Taichi
Taichi's
Tagram Tagram
TensileLite TensileLite
TensorBoard TensorBoard
TensorFlow TensorFlow
TensorParallel TensorParallel
TheRock
ToC ToC
TorchAudio TorchAudio
torchaudio torchaudio
@@ -544,7 +487,6 @@ UAC
UC UC
UCC UCC
UCX UCX
ud
UE UE
UIF UIF
UMC UMC
@@ -555,11 +497,9 @@ UltraChat
Uncached Uncached
Unittests Unittests
Unhandled Unhandled
unwindowed
VALU VALU
VBIOS VBIOS
VCN VCN
verl's
VGPR VGPR
VGPRs VGPRs
VM VM
@@ -572,13 +512,11 @@ Vanhoucke
Vulkan Vulkan
WGP WGP
WGPs WGPs
WR
WX WX
WikiText WikiText
Wojna Wojna
Workgroups Workgroups
Writebacks Writebacks
xcc
XCD XCD
XCDs XCDs
XGBoost XGBoost
@@ -599,7 +537,6 @@ ZenDNN
accuracies accuracies
activations activations
addr addr
addEventListener
ade ade
ai ai
alloc alloc
@@ -615,7 +552,6 @@ autogenerated
autotune autotune
avx avx
awk awk
az
backend backend
backends backends
bb bb
@@ -633,7 +569,6 @@ boson
bosons bosons
br br
BrainFloat BrainFloat
btn
buildable buildable
bursty bursty
bzip bzip
@@ -645,21 +580,18 @@ centric
changelog changelog
checkpointing checkpointing
chiplet chiplet
classList
cmake cmake
cmd cmd
coalescable coalescable
codename codename
collater collater
comgr comgr
compat
completers completers
composable composable
concretization concretization
config config
configs configs
conformant conformant
const
constructible constructible
convolutional convolutional
convolves convolves
@@ -694,14 +626,12 @@ denoised
denoises denoises
denormalize denormalize
dequantization dequantization
dequantized
dequantizes dequantizes
deserializers deserializers
detections detections
dev dev
devicelibs devicelibs
devsel devsel
dgl
dimensionality dimensionality
disambiguates disambiguates
distro distro
@@ -725,7 +655,6 @@ exascale
executables executables
ffmpeg ffmpeg
filesystem filesystem
forEach
fortran fortran
fp fp
framebuffer framebuffer
@@ -734,16 +663,13 @@ galb
gcc gcc
gdb gdb
gemm gemm
getAttribute
gfortran gfortran
gfx gfx
githooks githooks
github github
globals globals
gnupg gnupg
gpu
grayscale grayscale
gx
gzip gzip
heterogenous heterogenous
hipBLAS hipBLAS
@@ -796,7 +722,6 @@ invariants
invocating invocating
ipo ipo
jax jax
json
kdb kdb
kfd kfd
kv kv
@@ -810,7 +735,6 @@ linalg
linearized linearized
linter linter
linux linux
llm
llvm llvm
lm lm
localscratch localscratch
@@ -818,8 +742,6 @@ logits
lossy lossy
macOS macOS
matchers matchers
maxtext
megablocks
megatron megatron
microarchitecture microarchitecture
migraphx migraphx
@@ -848,7 +770,6 @@ opencv
openmp openmp
openssl openssl
optimizers optimizers
ol
os os
oversubscription oversubscription
pageable pageable
@@ -856,13 +777,10 @@ pallas
parallelization parallelization
parallelizing parallelizing
param param
params
parameterization parameterization
passthrough passthrough
pe
perfcounter perfcounter
performant performant
piecewise
perl perl
pragma pragma
pre pre
@@ -890,7 +808,6 @@ profiler
profilers profilers
protobuf protobuf
pseudorandom pseudorandom
px
py py
pytorch pytorch
recommender recommender
@@ -898,12 +815,9 @@ recommenders
quantile quantile
quantizer quantizer
quasirandom quasirandom
querySelector
querySelectorAll
queueing queueing
qwen qwen
radeon radeon
rc
rccl rccl
rdc rdc
rdma rdma
@@ -919,8 +833,6 @@ req
resampling resampling
rescaling rescaling
reusability reusability
rhel
rl
RLHF RLHF
roadmap roadmap
roc roc
@@ -965,24 +877,19 @@ scalability
scalable scalable
scipy scipy
seealso seealso
selectattr
selectedTag
sendmsg sendmsg
seqs seqs
serializers serializers
setAttribute
sglang sglang
shader shader
sharding sharding
sigmoid sigmoid
sles
sm sm
smi smi
softmax softmax
spack spack
spmm spmm
src src
stanford
stochastically stochastically
strided strided
subcommand subcommand
@@ -999,10 +906,8 @@ symlink
symlinks symlinks
sys sys
tabindex tabindex
targetContainer
td td
tensorfloat tensorfloat
tf
th th
tokenization tokenization
tokenize tokenize
@@ -1011,12 +916,9 @@ tokenizer
tokenizes tokenizes
toolchain toolchain
toolchains toolchains
topk
toolset toolset
toolsets toolsets
torchtitan
torchvision torchvision
tp
tqdm tqdm
tracebacks tracebacks
txt txt
@@ -1039,7 +941,6 @@ USM
UTCL UTCL
UTIL UTIL
utils utils
UX
vL vL
variational variational
vdi vdi
@@ -1069,8 +970,6 @@ writebacks
wrreq wrreq
wzo wzo
xargs xargs
xdit
xDiT
xGMI xGMI
xPacked xPacked
xz xz

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,17 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<manifest> <manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" /> <remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-7.1.1" <default revision="refs/tags/rocm-6.4.3"
remote="rocm-org" remote="rocm-org"
sync-c="true" sync-c="true"
sync-j="4" /> sync-j="4" />
<!--list of projects for ROCm--> <!--list of projects for ROCm-->
<project name="ROCK-Kernel-Driver" /> <project name="ROCK-Kernel-Driver" />
<project name="ROCR-Runtime" />
<project name="amdsmi" /> <project name="amdsmi" />
<project name="rdc" />
<project name="rocm_bandwidth_test" /> <project name="rocm_bandwidth_test" />
<project name="rocm_smi_lib" />
<project name="rocm-core" />
<project name="rocm-examples" /> <project name="rocm-examples" />
<project name="rocminfo" />
<project name="rocprofiler" />
<project name="rocprofiler-register" />
<project name="rocprofiler-sdk" />
<project name="rocprofiler-compute" />
<project name="rocprofiler-systems" />
<project name="roctracer" />
<!--HIP Projects--> <!--HIP Projects-->
<project name="HIP" />
<project name="hip-tests" />
<project name="HIPIFY" /> <project name="HIPIFY" />
<project name="clr" />
<project name="hipother" />
<!-- The following projects are all associated with the AMDGPU LLVM compiler --> <!-- The following projects are all associated with the AMDGPU LLVM compiler -->
<project name="half" /> <project name="half" />
<project name="llvm-project" /> <project name="llvm-project" />
@@ -22,32 +37,36 @@
<project name="rocr_debug_agent" /> <project name="rocr_debug_agent" />
<!-- ROCm Libraries --> <!-- ROCm Libraries -->
<project groups="mathlibs" name="AMDMIGraphX" /> <project groups="mathlibs" name="AMDMIGraphX" />
<project groups="mathlibs" name="MIOpen" />
<project groups="mathlibs" name="MIVisionX" /> <project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" /> <project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="Tensile" />
<project groups="mathlibs" name="composable_kernel" /> <project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipBLAS-common" />
<project groups="mathlibs" name="hipBLAS" />
<project groups="mathlibs" name="hipBLASLt" />
<project groups="mathlibs" name="hipCUB" />
<project groups="mathlibs" name="hipFFT" />
<project groups="mathlibs" name="hipRAND" />
<project groups="mathlibs" name="hipSOLVER" /> <project groups="mathlibs" name="hipSOLVER" />
<project groups="mathlibs" name="hipSPARSE" />
<project groups="mathlibs" name="hipSPARSELt" />
<project groups="mathlibs" name="hipTensor" /> <project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipfort" /> <project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" /> <project groups="mathlibs" name="rccl" />
<project groups="mathlibs" name="rocAL" /> <project groups="mathlibs" name="rocAL" />
<project groups="mathlibs" name="rocALUTION" /> <project groups="mathlibs" name="rocALUTION" />
<project groups="mathlibs" name="rocBLAS" />
<project groups="mathlibs" name="rocDecode" /> <project groups="mathlibs" name="rocDecode" />
<project groups="mathlibs" name="rocJPEG" /> <project groups="mathlibs" name="rocJPEG" />
<!-- The following components have been migrated to rocm-libraries:
hipBLAS-common hipBLAS hipBLASLt hipCUB
hipFFT hipRAND hipSPARSE hipSPARSELt
MIOpen rocBLAS rocFFT rocPRIM rocRAND
rocSPARSE rocThrust Tensile -->
<project groups="mathlibs" name="rocm-libraries" />
<!-- The following components have been migrated to rocm-systems:
aqlprofile clr hip hip-tests hipother
rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute
rocprofiler-register rocprofiler-sdk rocprofiler-systems
rocprofiler rocr-runtime roctracer -->
<project groups="mathlibs" name="rocm-systems" />
<project groups="mathlibs" name="rocPyDecode" /> <project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocSOLVER" /> <project groups="mathlibs" name="rocFFT" />
<project groups="mathlibs" name="rocPRIM" />
<project groups="mathlibs" name="rocRAND" />
<project groups="mathlibs" name="rocSHMEM" /> <project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocSOLVER" />
<project groups="mathlibs" name="rocSPARSE" />
<project groups="mathlibs" name="rocThrust" />
<project groups="mathlibs" name="rocWMMA" /> <project groups="mathlibs" name="rocWMMA" />
<project groups="mathlibs" name="rocm-cmake" /> <project groups="mathlibs" name="rocm-cmake" />
<project groups="mathlibs" name="rpp" /> <project groups="mathlibs" name="rpp" />

View File

@@ -25,69 +25,69 @@ additional licenses. Please review individual repositories for more information.
<!-- spellcheck-disable --> <!-- spellcheck-disable -->
| Component | License | | Component | License |
|:---------------------|:-------------------------| |:---------------------|:-------------------------|
| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/LICENSE.md) | | [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) | | [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) | | [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) | | [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
| [AQLprofile](https://github.com/ROCm/rocm-systems/tree/develop/projects/aqlprofile/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/aqlprofile/LICENSE.md) | | [AQLprofile] | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE) |
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) | | [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) | | [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) | | [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
| [HIP](https://github.com/ROCm/rocm-systems/tree/develop/projects/hip/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/hip/LICENSE.md) | | [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
| [hipamd](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/hipamd/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/hipamd/LICENSE.md) | | [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
| [hipBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblas/LICENSE.md) | | [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
| [hipBLASLt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblaslt/LICENSE.md) | | [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) | | [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
| [hipCUB](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipcub/) | [Custom](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipcub/LICENSE.txt) | | [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
| [hipFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipfft/LICENSE.md) | | [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) | | [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) | | [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
| [hipRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiprand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiprand/LICENSE.md) | | [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
| [hipSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsolver/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsolver/LICENSE.md) | | [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
| [hipSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparse/LICENSE.md) | | [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
| [hipSPARSELt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparselt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparselt/LICENSE.md) | | [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
| [hipTensor](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiptensor/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiptensor/LICENSE) | | [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) | | [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) | | [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) | | [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
| [MIOpen](https://github.com/ROCm/rocm-libraries/tree/develop/projects/miopen/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/miopen/LICENSE.md) | | [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) | | [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) | | [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) | | [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
| [rocBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocblas/LICENSE.md) | | [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) | | [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) | | [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
| [rocFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocfft/LICENSE.md) | | [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) | | [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
| [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) | | [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) | | [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
| [rocminfo](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocminfo/License.txt) | | [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) | | [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) | | [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) | | [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
| [ROCm-Core](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-core/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-core/LICENSE.md) | | [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
| [ROCm Compute Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-compute/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-compute/LICENSE.md) | | [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
| [ROCm Data Center (RDC)](https://github.com/ROCm/rocm-systems/tree/develop/projects/rdc/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rdc/LICENSE.md) | | [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) | | [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/opencl/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/opencl/LICENSE.md) | | [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) | | [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
| [ROCm SMI Lib](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-smi-lib/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-smi-lib/LICENSE.md) | | [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
| [ROCm Systems Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-systems/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-systems/LICENSE.md) | | [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) | | [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
| [rocPRIM](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocprim/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocprim/LICENSE.md) | | [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
| [ROCProfiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler/LICENSE.md) | | [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
| [ROCprofiler-SDK](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-sdk/LICENSE.md) | | [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) | | [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
| [rocRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocrand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocrand/LICENSE.md) | | [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) | | [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
| [ROCR-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocr-runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocr-runtime/LICENSE.txt) | | [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) | | [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
| [rocSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsolver/) | [BSD-2-Clause](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsolver/LICENSE.md) | | [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
| [rocSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsparse/LICENSE.md) | | [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
| [rocThrust](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocthrust/) | [Apache 2.0](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocthrust/LICENSE) | | [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
| [ROCTracer](https://github.com/ROCm/rocm-systems/tree/develop/projects/roctracer/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/roctracer/LICENSE.md) | | [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
| [rocWMMA](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocwmma/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/LICENSE.md) | | [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
| [Tensile](https://github.com/ROCm/rocm-libraries/tree/develop/shared/tensile/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/shared/tensile/LICENSE.md) | | [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) | | [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
Open sourced ROCm components are released via public GitHub Open sourced ROCm components are released via public GitHub

View File

@@ -1,136 +1,131 @@
ROCm Version,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0 ROCm Version,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
:ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility-past-60]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,, :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2" ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
,,,,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5" ,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
,"RHEL 10.1, 10.0, 9.7, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2" ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8" ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4" ,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
,,,,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9 ,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8",Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,,, ,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
,"Debian 13, 12","Debian 13, 12","Debian 13, 12",Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,,,,,,,,,,, ,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
,,,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,,,,,,,,,,,, ,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,,,,,,,,,,,,,,,,,, ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,CDNA4,CDNA4,,,,,,,,,,,,,,,,,, ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3 ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2 ,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,, ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3 ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2 :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, ,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility-past-60]_,gfx950,gfx950,gfx950,gfx950,,,,,,,,,,,,,,,,,, ,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,,,,,,,,,,,,,,, ,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,,,,,,,,,,,,,,, ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,,,,,,,,,,,,,,, ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100 ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030 ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942, gfx942, gfx942, gfx942, gfx942, gfx942, gfx942 ,,,,,,,,,,,,,,,,,,
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908 :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
,,,,,,,,,,,,,,,,,,,,,, :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13" :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1" :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26 :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,2.4.0,2.4.0,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A ,,,,,,,,,,,,,,,,,,
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.51.1,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A ,,,,,,,,,,,,,,,,,,
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,b6652,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
:doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1 `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
,,,,,,,,,,,,,,,,,,,,,, ,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,, THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, Thrust,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.4.0,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0 CUB,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.17.0,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1 ,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,, KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
Thrust,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 ,,,,,,,,,,,,,,,,,,
CUB,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,, :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x" :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0 :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
:doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.13.0,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0 :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.5.0,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0 :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
:doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 ,,,,,,,,,,,,,,,,,,
:doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,1.0.0,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
:doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
:doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.6.0,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`RPP <rpp:index>`,2.1.0,2.1.0,2.0.0,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0 ,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,, MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
:doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.26.6,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3 :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
:doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0 :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
:doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,3.0.2,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0 :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
:doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,1.0.0,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0 :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
:doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.20,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13 :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
:doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.7.0,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0 :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
:doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16 :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
:doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0 :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
:doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,4.0.1,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.4,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0 :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
:doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,4.0.0,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3 :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
:doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,5.0.2,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0 :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
:doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.34,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23 :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
:doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17 ,,,,,,,,,,,,,,,,,,
:doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.30.1,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0 PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
:doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,4.0.2,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2 :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,2.0.0,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0 :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
:doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.44.0,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0 :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, ,,,,,,,,,,,,,,,,,,
:doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 SUPPORT LIBS,,,,,,,,,,,,,,,,,,
:doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,2.0.0,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0 `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
:doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,4.0.1,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
:doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
,,,,,,,,,,,,,,,,,,,,,, ,,,,,,,,,,,,,,,,,,
SUPPORT LIBS,,,,,,,,,,,,,,,,,,,,,, SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
`hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0 :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245 :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
:doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,26.0.2,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2 ,,,,,,,,,,,,,,,,,,
:doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0 PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.8.0,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0 :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000 :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
,,,,,,,,,,,,,,,,,,,,,, :doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,,,, :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,2.6.0,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0 :doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.2.3,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A ,,,,,,,,,,,,,,,,,,
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.1.1,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.70002,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000 :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,1.0.0,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
:doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.70002,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000 :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,,,, `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
:doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,20.0.0,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0 ,,,,,,,,,,,,,,,,,,
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.4,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0 COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,16.3.0,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0 `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.1.0,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3 `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
,,,,,,,,,,,,,,,,,,,,,, :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,, `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0 ,,,,,,,,,,,,,,,,,,
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
`Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
:doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 :doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
,,,,,,,,,,,,,,,,,,,,,, :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
:doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
:doc:`HIP <hip:index>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.18.0,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
1 ROCm Version 7.1.1 6.4.3 7.1.0 6.4.2 7.0.2 6.4.1 7.0.1/7.0.0 6.4.0 6.3.3 6.3.2 6.3.1 6.3.0 6.2.4 6.2.2 6.2.1 6.2.0 6.1.5 6.1.2 6.1.1 6.1.0 6.0.2 6.0.0
2 :ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility-past-60]_ :ref:`Operating systems & kernels <OS-kernel-versions>` Ubuntu 24.04.3 Ubuntu 24.04.2 Ubuntu 24.04.3 Ubuntu 24.04.2 Ubuntu 24.04.3 Ubuntu 24.04.2 Ubuntu 24.04.3 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.1, 24.04 Ubuntu 24.04.1, 24.04 Ubuntu 24.04.1, 24.04 Ubuntu 24.04
3 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3, 22.04.2 Ubuntu 22.04.4, 22.04.3, 22.04.2
4 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5
5 RHEL 10.1, 10.0, 9.7, 9.6, 9.4 RHEL 9.6, 9.4 RHEL 10.0, 9.6, 9.4 RHEL 9.6, 9.4 RHEL 10.0, 9.6, 9.4 RHEL 9.6, 9.5, 9.4 RHEL 9.6, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.3, 9.2 RHEL 9.3, 9.2
6 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8
7 SLES 15 SP7 SLES 15 SP7, SP6 SLES 15 SP7 SLES 15 SP7, SP6 SLES 15 SP7 SLES 15 SP6 SLES 15 SP7 SLES 15 SP6 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4
8 CentOS 7.9 CentOS 7.9 CentOS 7.9 CentOS 7.9 CentOS 7.9
9 Oracle Linux 10, 9, 8 Oracle Linux 9, 8 Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 10, 9, 8 Oracle Linux 9, 8 Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 10, 9, 8 Oracle Linux 9, 8 Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 9, 8 Oracle Linux 9, 8 Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 8.10 Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 Oracle Linux 8.9 [#mi300x-past-60]_
10 Debian 13, 12 Debian 12 Debian 12 [#single-node-past-60]_ Debian 13, 12 Debian 12 Debian 12 [#single-node-past-60]_ Debian 13, 12 Debian 12 Debian 12 [#single-node-past-60]_ Debian 12 Debian 12 Debian 12 [#single-node-past-60]_ Debian 12 Debian 12 [#single-node-past-60]_ Debian 12 Debian 12 [#single-node-past-60]_ Debian 12 Debian 12 [#single-node-past-60]_
11 Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 Azure Linux 3.0 [#mi300x-past-60]_
12 Rocky Linux 9 .. _architecture-support-compatibility-matrix-past-60: Rocky Linux 9 Rocky Linux 9 Rocky Linux 9
13 :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>` .. _architecture-support-compatibility-matrix-past-60: CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3
14 :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>` CDNA4 CDNA2 CDNA4 CDNA2 CDNA4 CDNA2 CDNA4 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2
15 CDNA3 CDNA3 CDNA CDNA3 CDNA3 CDNA CDNA3 CDNA3 CDNA CDNA3 CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA CDNA3 CDNA
16 CDNA2 CDNA2 RDNA4 CDNA2 CDNA2 RDNA4 CDNA2 CDNA2 RDNA4 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2
17 CDNA CDNA RDNA3 CDNA CDNA RDNA3 CDNA CDNA RDNA3 CDNA CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3 CDNA RDNA3
18 RDNA4 RDNA4 RDNA2 RDNA4 RDNA4 RDNA2 RDNA4 RDNA4 RDNA2 RDNA4 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2
19 RDNA3 RDNA3 .. _gpu-support-compatibility-matrix-past-60: RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3
20 :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` RDNA2 RDNA2 gfx1201 [#RDNA-OS-past-60]_ RDNA2 RDNA2 gfx1201 [#RDNA-OS-past-60]_ RDNA2 RDNA2 gfx1201 [#RDNA-OS-past-60]_ RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2
21 .. _gpu-support-compatibility-matrix-past-60: gfx1200 [#RDNA-OS-past-60]_ gfx1200 [#RDNA-OS-past-60]_ gfx1200 [#RDNA-OS-past-60]_
22 :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility-past-60]_ gfx950 gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_ gfx950 gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_ gfx950 gfx1101 [#RDNA-OS-past-60]_ gfx950
23 gfx1201 gfx1201 gfx1100 gfx1201 gfx1201 gfx1100 gfx1201 gfx1201 gfx1100 gfx1201 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100
24 gfx1200 gfx1200 gfx1030 gfx1200 gfx1200 gfx1030 gfx1200 gfx1200 gfx1030 gfx1200 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030
25 gfx1101 gfx1101 gfx942 gfx1101 gfx1101 gfx942 gfx1101 gfx1101 gfx942 gfx1101 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 [#mi300_624-past-60]_ gfx942 [#mi300_622-past-60]_ gfx942 [#mi300_621-past-60]_ gfx942 [#mi300_620-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_611-past-60]_ gfx942 [#mi300_610-past-60]_ gfx942 [#mi300_602-past-60]_ gfx942 [#mi300_600-past-60]_
26 gfx1100 gfx1100 gfx90a gfx1100 gfx1100 gfx90a gfx1100 gfx1100 gfx90a gfx1100 gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a
27 gfx1030 gfx1030 gfx908 gfx1030 gfx1030 gfx908 gfx1030 gfx1030 gfx908 gfx1030 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908
28 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942
29 FRAMEWORK SUPPORT gfx90a gfx90a .. _framework-support-compatibility-matrix-past-60: gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a
30 :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>` gfx908 gfx908 2.6, 2.5, 2.4, 2.3 gfx908 gfx908 2.6, 2.5, 2.4, 2.3 gfx908 gfx908 2.6, 2.5, 2.4, 2.3 gfx908 gfx908 2.6, 2.5, 2.4, 2.3 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13
31 :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>` 2.18.1, 2.17.1, 2.16.2 2.18.1, 2.17.1, 2.16.2 2.18.1, 2.17.1, 2.16.2 2.18.1, 2.17.1, 2.16.2 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.14.0, 2.13.1, 2.12.1 2.14.0, 2.13.1, 2.12.1
32 FRAMEWORK SUPPORT :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>` .. _framework-support-compatibility-matrix-past-60: 0.4.35 0.4.35 0.4.35 0.4.35 0.4.31 0.4.31 0.4.31 0.4.31 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26
33 :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>` :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_ 2.9, 2.8, 2.7 2.6, 2.5, 2.4, 2.3 N/A 2.8, 2.7, 2.6 2.6, 2.5, 2.4, 2.3 N/A 2.8, 2.7, 2.6 2.6, 2.5, 2.4, 2.3 N/A 2.7, 2.6, 2.5 2.6, 2.5, 2.4, 2.3 N/A 2.4, 2.3, 2.2, 1.13 N/A 2.4, 2.3, 2.2, 1.13 N/A 2.4, 2.3, 2.2, 1.13 N/A 2.4, 2.3, 2.2, 2.1, 2.0, 1.13 N/A 2.3, 2.2, 2.1, 2.0, 1.13 N/A 2.3, 2.2, 2.1, 2.0, 1.13 N/A 2.3, 2.2, 2.1, 2.0, 1.13 N/A 2.3, 2.2, 2.1, 2.0, 1.13 0.3.0.post0 2.1, 2.0, 1.13 N/A 2.1, 2.0, 1.13 N/A 2.1, 2.0, 1.13 N/A 2.1, 2.0, 1.13 N/A 2.1, 2.0, 1.13 N/A
34 :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>` :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_ 2.20.0, 2.19.1, 2.18.1 2.18.1, 2.17.1, 2.16.2 N/A 2.20.0, 2.19.1, 2.18.1 2.18.1, 2.17.1, 2.16.2 N/A 2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_ 2.18.1, 2.17.1, 2.16.2 N/A 2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_ 2.18.1, 2.17.1, 2.16.2 N/A 2.17.0, 2.16.2, 2.15.1 N/A 2.17.0, 2.16.2, 2.15.1 N/A 2.17.0, 2.16.2, 2.15.1 N/A 2.17.0, 2.16.2, 2.15.1 85f95ae 2.16.1, 2.15.1, 2.14.1 N/A 2.16.1, 2.15.1, 2.14.1 N/A 2.16.1, 2.15.1, 2.14.1 N/A 2.16.1, 2.15.1, 2.14.1 N/A 2.15.0, 2.14.0, 2.13.1 N/A 2.15.0, 2.14.0, 2.13.1 N/A 2.15.0, 2.14.0, 2.13.1 N/A 2.15.0, 2.14.0, 2.13.1 N/A 2.14.0, 2.13.1, 2.12.1 N/A
35 :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>` :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_ 0.7.1 0.4.35 N/A 0.7.1 0.4.35 N/A 0.6.0 0.4.35 N/A 0.6.0 0.4.35 2.4.0 0.4.31 N/A 0.4.31 N/A 0.4.31 N/A 0.4.31 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26 N/A 0.4.26
36 :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_ :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_ N/A N/A N/A N/A N/A N/A 0.6.0 N/A N/A N/A N/A N/A 0.7.0 N/A N/A N/A 0.3.0.post0 N/A N/A N/A N/A N/A N/A
37 :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_ :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A 1.8.0b1 N/A 85f95ae N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A
38 :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_ `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_ N/A 2.4.0 1.2 N/A N/A 1.2 N/A N/A 1.2 2.4.0 2.4.0 1.2 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.17.3 N/A 1.14.1 N/A 1.14.1
39 :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A 0.7.0 N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A
40 :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_ N/A N/A N/A N/A N/A 2.48.0.post0 2.51.1 N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A
41 :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_ THIRD PARTY COMMS N/A b6356 .. _thirdpartycomms-support-compatibility-matrix-past-60: N/A b6356 N/A b6356 b6652 b5997 N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A
42 :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_ `UCC <https://github.com/ROCm/ucc>`_ N/A N/A >=1.3.0 N/A N/A >=1.3.0 N/A v0.2.5 >=1.3.0 N/A N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.3.0 N/A >=1.2.0 N/A >=1.2.0
43 `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_ `UCX <https://github.com/ROCm/ucx>`_ 1.23.1 1.20.0 >=1.15.0 1.22.0 1.20.0 >=1.15.0 1.22.0 1.20.0 >=1.15.0 1.22.0 1.20.0 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.15.0 1.17.3 >=1.14.1 1.17.3 >=1.14.1 1.17.3 >=1.14.1 1.17.3 >=1.14.1 1.14.1 >=1.14.1 1.14.1 >=1.14.1
44
45 THIRD PARTY ALGORITHM .. _thirdpartyalgorithm-support-compatibility-matrix-past-60:
46 THIRD PARTY COMMS Thrust .. _thirdpartycomms-support-compatibility-matrix-past-60: 2.5.0 2.5.0 2.5.0 2.5.0 2.3.2 2.3.2 2.3.2 2.3.2 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.1 2.0.1
47 `UCC <https://github.com/ROCm/ucc>`_ CUB >=1.4.0 >=1.3.0 2.5.0 >=1.4.0 >=1.3.0 2.5.0 >=1.4.0 >=1.3.0 2.5.0 >=1.4.0 >=1.3.0 2.5.0 >=1.3.0 2.3.2 >=1.3.0 2.3.2 >=1.3.0 2.3.2 >=1.3.0 2.3.2 >=1.3.0 2.2.0 >=1.3.0 2.2.0 >=1.3.0 2.2.0 >=1.3.0 2.2.0 >=1.3.0 2.1.0 >=1.3.0 2.1.0 >=1.3.0 2.1.0 >=1.3.0 2.1.0 >=1.2.0 2.0.1 >=1.2.0 2.0.1
48 `UCX <https://github.com/ROCm/ucx>`_ >=1.17.0 >=1.15.0 >=1.17.0 >=1.15.0 >=1.17.0 >=1.15.0 >=1.17.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1
49 KMD & USER SPACE [#kfd_support-past-60]_ .. _kfd-userspace-support-compatibility-matrix-past-60:
50 THIRD PARTY ALGORITHM :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>` .. _thirdpartyalgorithm-support-compatibility-matrix-past-60: 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x
51 Thrust 2.8.5 2.5.0 2.8.5 2.5.0 2.6.0 2.5.0 2.6.0 2.5.0 2.3.2 2.3.2 2.3.2 2.3.2 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.1 2.0.1
52 CUB ML & COMPUTER VISION 2.8.5 2.5.0 .. _mllibs-support-compatibility-matrix-past-60: 2.8.5 2.5.0 2.6.0 2.5.0 2.6.0 2.5.0 2.3.2 2.3.2 2.3.2 2.3.2 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.1 2.0.1
53 :doc:`Composable Kernel <composable_kernel:index>` 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0
54 DRIVER & USER SPACE [#kfd_support-past-60]_ :doc:`MIGraphX <amdmigraphx:index>` .. _kfd-userspace-support-compatibility-matrix-past-60: 2.12.0 2.12.0 2.12.0 2.12.0 2.11.0 2.11.0 2.11.0 2.11.0 2.10.0 2.10.0 2.10.0 2.10.0 2.9.0 2.9.0 2.9.0 2.9.0 2.8.0 2.8.0
55 :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>` :doc:`MIOpen <miopen:index>` 30.20.1, 30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.4.0 30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.4.0 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.4.0 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.4.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.3.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.3.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.3.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 3.3.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 3.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 3.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 3.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 3.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 3.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 3.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 3.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 3.1.0 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 3.0.0 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 3.0.0
56 :doc:`MIVisionX <mivisionx:index>` 3.2.0 3.2.0 3.2.0 3.2.0 3.1.0 3.1.0 3.1.0 3.1.0 3.0.0 3.0.0 3.0.0 3.0.0 2.5.0 2.5.0 2.5.0 2.5.0 2.5.0 2.5.0
57 ML & COMPUTER VISION :doc:`rocAL <rocal:index>` .. _mllibs-support-compatibility-matrix-past-60: 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.0 2.0.0 2.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
58 :doc:`Composable Kernel <composable_kernel:index>` :doc:`rocDecode <rocdecode:index>` 1.1.0 1.1.0 0.10.0 1.1.0 1.1.0 0.10.0 1.1.0 1.1.0 0.10.0 1.1.0 1.1.0 0.10.0 1.1.0 0.8.0 1.1.0 0.8.0 1.1.0 0.8.0 1.1.0 0.8.0 1.1.0 0.6.0 1.1.0 0.6.0 1.1.0 0.6.0 1.1.0 0.6.0 1.1.0 0.6.0 1.1.0 0.6.0 1.1.0 0.5.0 1.1.0 0.5.0 1.1.0 N/A 1.1.0 N/A
59 :doc:`MIGraphX <amdmigraphx:index>` :doc:`rocJPEG <rocjpeg:index>` 2.14.0 2.12.0 0.8.0 2.14.0 2.12.0 0.8.0 2.13.0 2.12.0 0.8.0 2.13.0 2.12.0 0.8.0 2.11.0 0.6.0 2.11.0 0.6.0 2.11.0 0.6.0 2.11.0 0.6.0 2.10.0 N/A 2.10.0 N/A 2.10.0 N/A 2.10.0 N/A 2.9.0 N/A 2.9.0 N/A 2.9.0 N/A 2.9.0 N/A 2.8.0 N/A 2.8.0 N/A
60 :doc:`MIOpen <miopen:index>` :doc:`rocPyDecode <rocpydecode:index>` 3.5.1 3.4.0 0.3.1 3.5.1 3.4.0 0.3.1 3.5.0 3.4.0 0.3.1 3.5.0 3.4.0 0.3.1 3.3.0 0.2.0 3.3.0 0.2.0 3.3.0 0.2.0 3.3.0 0.2.0 3.2.0 0.1.0 3.2.0 0.1.0 3.2.0 0.1.0 3.2.0 0.1.0 3.1.0 N/A 3.1.0 N/A 3.1.0 N/A 3.1.0 N/A 3.0.0 N/A 3.0.0 N/A
61 :doc:`MIVisionX <mivisionx:index>` :doc:`RPP <rpp:index>` 3.4.0 3.2.0 1.9.10 3.4.0 3.2.0 1.9.10 3.3.0 3.2.0 1.9.10 3.3.0 3.2.0 1.9.10 3.1.0 1.9.1 3.1.0 1.9.1 3.1.0 1.9.1 3.1.0 1.9.1 3.0.0 1.8.0 3.0.0 1.8.0 3.0.0 1.8.0 3.0.0 1.8.0 2.5.0 1.5.0 2.5.0 1.5.0 2.5.0 1.5.0 2.5.0 1.5.0 2.5.0 1.4.0 2.5.0 1.4.0
62 :doc:`rocAL <rocal:index>` 2.4.0 2.2.0 2.4.0 2.2.0 2.3.0 2.2.0 2.3.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.0 2.0.0 2.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
63 :doc:`rocDecode <rocdecode:index>` COMMUNICATION 1.4.0 0.10.0 .. _commlibs-support-compatibility-matrix-past-60: 1.4.0 0.10.0 1.0.0 0.10.0 1.0.0 0.10.0 0.8.0 0.8.0 0.8.0 0.8.0 0.6.0 0.6.0 0.6.0 0.6.0 0.6.0 0.6.0 0.5.0 0.5.0 N/A N/A
64 :doc:`rocJPEG <rocjpeg:index>` :doc:`RCCL <rccl:index>` 1.2.0 0.8.0 2.22.3 1.2.0 0.8.0 2.22.3 1.1.0 0.8.0 2.22.3 1.1.0 0.8.0 2.22.3 0.6.0 2.21.5 0.6.0 2.21.5 0.6.0 2.21.5 0.6.0 2.21.5 N/A 2.20.5 N/A 2.20.5 N/A 2.20.5 N/A 2.20.5 N/A 2.18.6 N/A 2.18.6 N/A 2.18.6 N/A 2.18.6 N/A 2.18.3 N/A 2.18.3
65 :doc:`rocPyDecode <rocpydecode:index>` :doc:`rocSHMEM <rocshmem:index>` 0.7.0 0.3.1 2.0.1 0.7.0 0.3.1 2.0.1 0.6.0 0.3.1 2.0.0 0.6.0 0.3.1 2.0.0 0.2.0 N/A 0.2.0 N/A 0.2.0 N/A 0.2.0 N/A 0.1.0 N/A 0.1.0 N/A 0.1.0 N/A 0.1.0 N/A N/A N/A N/A N/A N/A N/A
66 :doc:`RPP <rpp:index>` 2.1.0 1.9.10 2.1.0 1.9.10 2.0.0 1.9.10 2.0.0 1.9.10 1.9.1 1.9.1 1.9.1 1.9.1 1.8.0 1.8.0 1.8.0 1.8.0 1.5.0 1.5.0 1.5.0 1.5.0 1.4.0 1.4.0
67 MATH LIBS .. _mathlibs-support-compatibility-matrix-past-60:
68 COMMUNICATION `half <https://github.com/ROCm/half>`_ .. _commlibs-support-compatibility-matrix-past-60: 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0 1.12.0
69 :doc:`RCCL <rccl:index>` :doc:`hipBLAS <hipblas:index>` 2.27.7 2.22.3 2.4.0 2.27.7 2.22.3 2.4.0 2.26.6 2.22.3 2.4.0 2.26.6 2.22.3 2.4.0 2.21.5 2.3.0 2.21.5 2.3.0 2.21.5 2.3.0 2.21.5 2.3.0 2.20.5 2.2.0 2.20.5 2.2.0 2.20.5 2.2.0 2.20.5 2.2.0 2.18.6 2.1.0 2.18.6 2.1.0 2.18.6 2.1.0 2.18.6 2.1.0 2.18.3 2.0.0 2.18.3 2.0.0
70 :doc:`rocSHMEM <rocshmem:index>` :doc:`hipBLASLt <hipblaslt:index>` 3.1.0 2.0.1 0.12.1 3.0.0 2.0.1 0.12.1 3.0.0 2.0.0 0.12.1 3.0.0 2.0.0 0.12.0 N/A 0.10.0 N/A 0.10.0 N/A 0.10.0 N/A 0.10.0 N/A 0.8.0 N/A 0.8.0 N/A 0.8.0 N/A 0.8.0 N/A 0.7.0 N/A 0.7.0 N/A 0.7.0 N/A 0.7.0 N/A 0.6.0 N/A 0.6.0
71 :doc:`hipFFT <hipfft:index>` 1.0.18 1.0.18 1.0.18 1.0.18 1.0.17 1.0.17 1.0.17 1.0.17 1.0.16 1.0.15 1.0.15 1.0.14 1.0.14 1.0.14 1.0.14 1.0.14 1.0.13 1.0.13
72 MATH LIBS :doc:`hipfort <hipfort:index>` .. _mathlibs-support-compatibility-matrix-past-60: 0.6.0 0.6.0 0.6.0 0.6.0 0.5.1 0.5.1 0.5.0 0.5.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0
73 `half <https://github.com/ROCm/half>`_ :doc:`hipRAND <hiprand:index>` 1.12.0 1.12.0 2.12.0 1.12.0 1.12.0 2.12.0 1.12.0 1.12.0 2.12.0 1.12.0 1.12.0 2.12.0 1.12.0 2.11.1 1.12.0 2.11.1 1.12.0 2.11.1 1.12.0 2.11.0 1.12.0 2.11.1 1.12.0 2.11.0 1.12.0 2.11.0 1.12.0 2.11.0 1.12.0 2.10.16 1.12.0 2.10.16 1.12.0 2.10.16 1.12.0 2.10.16 1.12.0 2.10.16 1.12.0 2.10.16
74 :doc:`hipBLAS <hipblas:index>` :doc:`hipSOLVER <hipsolver:index>` 3.1.0 2.4.0 3.1.0 2.4.0 3.0.2 2.4.0 3.0.0 2.4.0 2.3.0 2.3.0 2.3.0 2.3.0 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.1 2.1.0 2.1.1 2.1.0 2.1.1 2.1.0 2.0.0 2.0.0
75 :doc:`hipBLASLt <hipblaslt:index>` :doc:`hipSPARSE <hipsparse:index>` 1.1.0 0.12.1 3.2.0 1.1.0 0.12.1 3.2.0 1.0.0 0.12.1 3.2.0 1.0.0 0.12.0 3.2.0 0.10.0 3.1.2 0.10.0 3.1.2 0.10.0 3.1.2 0.10.0 3.1.2 0.8.0 3.1.1 0.8.0 3.1.1 0.8.0 3.1.1 0.8.0 3.1.1 0.7.0 3.0.1 0.7.0 3.0.1 0.7.0 3.0.1 0.7.0 3.0.1 0.6.0 3.0.0 0.6.0 3.0.0
76 :doc:`hipFFT <hipfft:index>` :doc:`hipSPARSELt <hipsparselt:index>` 1.0.21 1.0.18 0.2.3 1.0.21 1.0.18 0.2.3 1.0.20 1.0.18 0.2.3 1.0.20 1.0.18 0.2.3 1.0.17 0.2.2 1.0.17 0.2.2 1.0.17 0.2.2 1.0.17 0.2.2 1.0.16 0.2.1 1.0.15 0.2.1 1.0.15 0.2.1 1.0.14 0.2.1 1.0.14 0.2.0 1.0.14 0.2.0 1.0.14 0.1.0 1.0.14 0.1.0 1.0.13 0.1.0 1.0.13 0.1.0
77 :doc:`hipfort <hipfort:index>` :doc:`rocALUTION <rocalution:index>` 0.7.1 0.6.0 3.2.3 0.7.1 0.6.0 3.2.3 0.7.0 0.6.0 3.2.3 0.7.0 0.6.0 3.2.2 0.5.1 3.2.1 0.5.1 3.2.1 0.5.0 3.2.1 0.5.0 3.2.1 0.4.0 3.2.1 0.4.0 3.2.0 0.4.0 3.2.0 0.4.0 3.2.0 0.4.0 3.1.1 0.4.0 3.1.1 0.4.0 3.1.1 0.4.0 3.1.1 0.4.0 3.0.3 0.4.0 3.0.3
78 :doc:`hipRAND <hiprand:index>` :doc:`rocBLAS <rocblas:index>` 3.1.0 2.12.0 4.4.1 3.1.0 2.12.0 4.4.1 3.0.0 2.12.0 4.4.0 3.0.0 2.12.0 4.4.0 2.11.1 4.3.0 2.11.1 4.3.0 2.11.1 4.3.0 2.11.0 4.3.0 2.11.1 4.2.4 2.11.0 4.2.1 2.11.0 4.2.1 2.11.0 4.2.0 2.10.16 4.1.2 2.10.16 4.1.2 2.10.16 4.1.0 2.10.16 4.1.0 2.10.16 4.0.0 2.10.16 4.0.0
79 :doc:`hipSOLVER <hipsolver:index>` :doc:`rocFFT <rocfft:index>` 3.1.0 2.4.0 1.0.32 3.1.0 2.4.0 1.0.32 3.0.0 2.4.0 1.0.32 3.0.0 2.4.0 1.0.32 2.3.0 1.0.31 2.3.0 1.0.31 2.3.0 1.0.31 2.3.0 1.0.31 2.2.0 1.0.30 2.2.0 1.0.29 2.2.0 1.0.29 2.2.0 1.0.28 2.1.1 1.0.27 2.1.1 1.0.27 2.1.1 1.0.27 2.1.0 1.0.26 2.0.0 1.0.25 2.0.0 1.0.23
80 :doc:`hipSPARSE <hipsparse:index>` :doc:`rocRAND <rocrand:index>` 4.1.0 3.2.0 3.3.0 4.1.0 3.2.0 3.3.0 4.0.1 3.2.0 3.3.0 4.0.1 3.2.0 3.3.0 3.1.2 3.2.0 3.1.2 3.2.0 3.1.2 3.2.0 3.1.2 3.2.0 3.1.1 3.1.1 3.1.0 3.1.1 3.1.0 3.1.1 3.1.0 3.0.1 3.0.1 3.0.1 3.0.1 3.0.0 3.0.0 2.10.17
81 :doc:`hipSPARSELt <hipsparselt:index>` :doc:`rocSOLVER <rocsolver:index>` 0.2.5 0.2.3 3.28.2 0.2.5 0.2.3 3.28.2 0.2.4 0.2.3 3.28.0 0.2.4 0.2.3 3.28.0 0.2.2 3.27.0 0.2.2 3.27.0 0.2.2 3.27.0 0.2.2 3.27.0 0.2.1 3.26.2 0.2.1 3.26.0 0.2.1 3.26.0 0.2.1 3.26.0 0.2.0 3.25.0 0.2.0 3.25.0 0.1.0 3.25.0 0.1.0 3.25.0 0.1.0 3.24.0 0.1.0 3.24.0
82 :doc:`rocALUTION <rocalution:index>` :doc:`rocSPARSE <rocsparse:index>` 4.0.1 3.2.3 3.4.0 4.0.1 3.2.3 3.4.0 4.0.0 3.2.3 3.4.0 4.0.0 3.2.2 3.4.0 3.2.1 3.3.0 3.2.1 3.3.0 3.2.1 3.3.0 3.2.1 3.3.0 3.2.1 3.2.0 3.2.0 3.2.0 3.1.1 3.1.2 3.1.1 3.1.2 3.1.1 3.1.2 3.1.1 3.1.2 3.0.3 3.0.2 3.0.3 3.0.2
83 :doc:`rocBLAS <rocblas:index>` :doc:`rocWMMA <rocwmma:index>` 5.1.1 4.4.1 1.7.0 5.1.0 4.4.1 1.7.0 5.0.2 4.4.0 1.7.0 5.0.0 4.4.0 1.7.0 4.3.0 1.6.0 4.3.0 1.6.0 4.3.0 1.6.0 4.3.0 1.6.0 4.2.4 1.5.0 4.2.1 1.5.0 4.2.1 1.5.0 4.2.0 1.5.0 4.1.2 1.4.0 4.1.2 1.4.0 4.1.0 1.4.0 4.1.0 1.4.0 4.0.0 1.3.0 4.0.0 1.3.0
84 :doc:`rocFFT <rocfft:index>` :doc:`Tensile <tensile:src/index>` 1.0.35 1.0.32 4.43.0 1.0.35 1.0.32 4.43.0 1.0.34 1.0.32 4.43.0 1.0.34 1.0.32 4.43.0 1.0.31 4.42.0 1.0.31 4.42.0 1.0.31 4.42.0 1.0.31 4.42.0 1.0.30 4.41.0 1.0.29 4.41.0 1.0.29 4.41.0 1.0.28 4.41.0 1.0.27 4.40.0 1.0.27 4.40.0 1.0.27 4.40.0 1.0.26 4.40.0 1.0.25 4.39.0 1.0.23 4.39.0
85 :doc:`rocRAND <rocrand:index>` 4.1.0 3.3.0 4.1.0 3.3.0 4.0.0 3.3.0 4.0.0 3.3.0 3.2.0 3.2.0 3.2.0 3.2.0 3.1.1 3.1.0 3.1.0 3.1.0 3.0.1 3.0.1 3.0.1 3.0.1 3.0.0 2.10.17
86 :doc:`rocSOLVER <rocsolver:index>` PRIMITIVES 3.31.0 3.28.2 .. _primitivelibs-support-compatibility-matrix-past-60: 3.31.0 3.28.2 3.30.1 3.28.0 3.30.0 3.28.0 3.27.0 3.27.0 3.27.0 3.27.0 3.26.2 3.26.0 3.26.0 3.26.0 3.25.0 3.25.0 3.25.0 3.25.0 3.24.0 3.24.0
87 :doc:`rocSPARSE <rocsparse:index>` :doc:`hipCUB <hipcub:index>` 4.1.0 3.4.0 4.1.0 3.4.0 4.0.2 3.4.0 4.0.2 3.4.0 3.3.0 3.3.0 3.3.0 3.3.0 3.2.1 3.2.0 3.2.0 3.2.0 3.1.2 3.1.0 3.1.2 3.1.0 3.1.2 3.1.0 3.1.2 3.1.0 3.0.2 3.0.0 3.0.2 3.0.0
88 :doc:`rocWMMA <rocwmma:index>` :doc:`hipTensor <hiptensor:index>` 2.1.0 1.7.0 1.5.0 2.0.0 1.7.0 1.5.0 2.0.0 1.7.0 1.5.0 2.0.0 1.7.0 1.5.0 1.6.0 1.4.0 1.6.0 1.4.0 1.6.0 1.4.0 1.6.0 1.4.0 1.5.0 1.3.0 1.5.0 1.3.0 1.5.0 1.3.0 1.5.0 1.3.0 1.4.0 1.2.0 1.4.0 1.2.0 1.4.0 1.2.0 1.4.0 1.2.0 1.3.0 1.1.0 1.3.0 1.1.0
89 :doc:`Tensile <tensile:src/index>` :doc:`rocPRIM <rocprim:index>` 4.44.0 4.43.0 3.4.1 4.44.0 4.43.0 3.4.1 4.44.0 4.43.0 3.4.0 4.44.0 4.43.0 3.4.0 4.42.0 3.3.0 4.42.0 3.3.0 4.42.0 3.3.0 4.42.0 3.3.0 4.41.0 3.2.2 4.41.0 3.2.0 4.41.0 3.2.0 4.41.0 3.2.0 4.40.0 3.1.0 4.40.0 3.1.0 4.40.0 3.1.0 4.40.0 3.1.0 4.39.0 3.0.0 4.39.0 3.0.0
90 :doc:`rocThrust <rocthrust:index>` 3.3.0 3.3.0 3.3.0 3.3.0 3.3.0 3.3.0 3.3.0 3.3.0 3.1.1 3.1.0 3.1.0 3.0.1 3.0.1 3.0.1 3.0.1 3.0.1 3.0.0 3.0.0
91 PRIMITIVES .. _primitivelibs-support-compatibility-matrix-past-60:
92 :doc:`hipCUB <hipcub:index>` SUPPORT LIBS 4.1.0 3.4.0 4.1.0 3.4.0 4.0.0 3.4.0 4.0.0 3.4.0 3.3.0 3.3.0 3.3.0 3.3.0 3.2.1 3.2.0 3.2.0 3.2.0 3.1.0 3.1.0 3.1.0 3.1.0 3.0.0 3.0.0
93 :doc:`hipTensor <hiptensor:index>` `hipother <https://github.com/ROCm/hipother>`_ 2.0.0 1.5.0 6.4.43483 2.0.0 1.5.0 6.4.43483 2.0.0 1.5.0 6.4.43483 2.0.0 1.5.0 6.4.43482 1.4.0 6.3.42134 1.4.0 6.3.42134 1.4.0 6.3.42133 1.4.0 6.3.42131 1.3.0 6.2.41134 1.3.0 6.2.41134 1.3.0 6.2.41134 1.3.0 6.2.41133 1.2.0 6.1.40093 1.2.0 6.1.40093 1.2.0 6.1.40092 1.2.0 6.1.40091 1.1.0 6.1.32831 1.1.0 6.1.32830
94 :doc:`rocPRIM <rocprim:index>` `rocm-core <https://github.com/ROCm/rocm-core>`_ 4.1.0 3.4.1 6.4.3 4.1.0 3.4.1 6.4.2 4.0.1 3.4.0 6.4.1 4.0.0 3.4.0 6.4.0 3.3.0 6.3.3 3.3.0 6.3.2 3.3.0 6.3.1 3.3.0 6.3.0 3.2.2 6.2.4 3.2.0 6.2.2 3.2.0 6.2.1 3.2.0 6.2.0 3.1.0 6.1.5 3.1.0 6.1.2 3.1.0 6.1.1 3.1.0 6.1.0 3.0.0 6.0.2 3.0.0 6.0.0
95 :doc:`rocThrust <rocthrust:index>` `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_ 4.1.0 3.3.0 N/A [#ROCT-rocr-past-60]_ 4.1.0 3.3.0 N/A [#ROCT-rocr-past-60]_ 4.0.0 3.3.0 N/A [#ROCT-rocr-past-60]_ 4.0.0 3.3.0 N/A [#ROCT-rocr-past-60]_ 3.3.0 N/A [#ROCT-rocr-past-60]_ 3.3.0 N/A [#ROCT-rocr-past-60]_ 3.3.0 N/A [#ROCT-rocr-past-60]_ 3.3.0 N/A [#ROCT-rocr-past-60]_ 3.1.1 20240607.5.7 3.1.0 20240607.5.7 3.1.0 20240607.4.05 3.0.1 20240607.1.4246 3.0.1 20240125.5.08 3.0.1 20240125.5.08 3.0.1 20240125.5.08 3.0.1 20240125.3.30 3.0.0 20231016.2.245 3.0.0 20231016.2.245
96
97 SUPPORT LIBS SYSTEM MGMT TOOLS .. _tools-support-compatibility-matrix-past-60:
98 `hipother <https://github.com/ROCm/hipother>`_ :doc:`AMD SMI <amdsmi:index>` 7.1.52802 6.4.43483 25.5.1 7.1.25424 6.4.43483 25.5.1 7.0.51831 6.4.43483 25.4.2 7.0.51830 6.4.43482 25.3.0 6.3.42134 24.7.1 6.3.42134 24.7.1 6.3.42133 24.7.1 6.3.42131 24.7.1 6.2.41134 24.6.3 6.2.41134 24.6.3 6.2.41134 24.6.3 6.2.41133 24.6.2 6.1.40093 24.5.1 6.1.40093 24.5.1 6.1.40092 24.5.1 6.1.40091 24.4.1 6.1.32831 23.4.2 6.1.32830 23.4.2
99 `rocm-core <https://github.com/ROCm/rocm-core>`_ :doc:`ROCm Data Center Tool <rdc:index>` 7.1.1 6.4.3 0.3.0 7.1.0 6.4.2 0.3.0 7.0.2 6.4.1 0.3.0 7.0.1/7.0.0 6.4.0 0.3.0 6.3.3 0.3.0 6.3.2 0.3.0 6.3.1 0.3.0 6.3.0 0.3.0 6.2.4 0.3.0 6.2.2 0.3.0 6.2.1 0.3.0 6.2.0 0.3.0 6.1.5 0.3.0 6.1.2 0.3.0 6.1.1 0.3.0 6.1.0 0.3.0 6.0.2 0.3.0 6.0.0 0.3.0
100 `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_ :doc:`rocminfo <rocminfo:index>` N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ 1.0.0 N/A [#ROCT-rocr-past-60]_ 1.0.0 20240607.5.7 1.0.0 20240607.5.7 1.0.0 20240607.4.05 1.0.0 20240607.1.4246 1.0.0 20240125.5.08 1.0.0 20240125.5.08 1.0.0 20240125.5.08 1.0.0 20240125.3.30 1.0.0 20231016.2.245 1.0.0 20231016.2.245 1.0.0
101 :doc:`ROCm SMI <rocm_smi_lib:index>` 7.7.0 7.5.0 7.5.0 7.5.0 7.4.0 7.4.0 7.4.0 7.4.0 7.3.0 7.3.0 7.3.0 7.3.0 7.2.0 7.2.0 7.0.0 7.0.0 6.0.2 6.0.0
102 SYSTEM MGMT TOOLS :doc:`ROCm Validation Suite <rocmvalidationsuite:index>` .. _tools-support-compatibility-matrix-past-60: 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.1.0 1.0.60204 1.0.60202 1.0.60201 1.0.60200 1.0.60105 1.0.60102 1.0.60101 1.0.60100 1.0.60002 1.0.60000
103 :doc:`AMD SMI <amdsmi:index>` 26.2.0 25.5.1 26.1.0 25.5.1 26.0.2 25.4.2 26.0.0 25.3.0 24.7.1 24.7.1 24.7.1 24.7.1 24.6.3 24.6.3 24.6.3 24.6.2 24.5.1 24.5.1 24.5.1 24.4.1 23.4.2 23.4.2
104 :doc:`ROCm Data Center Tool <rdc:index>` PERFORMANCE TOOLS 1.2.0 0.3.0 1.2.0 0.3.0 1.1.0 0.3.0 1.1.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0
105 :doc:`rocminfo <rocminfo:index>` :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>` 1.0.0 1.0.0 1.4.0 1.0.0 1.0.0 1.4.0 1.0.0 1.0.0 1.4.0 1.0.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0 1.0.0 1.4.0
106 :doc:`ROCm SMI <rocm_smi_lib:index>` :doc:`ROCm Compute Profiler <rocprofiler-compute:index>` 7.8.0 7.7.0 3.1.1 7.8.0 7.5.0 3.1.1 7.8.0 7.5.0 3.1.0 7.8.0 7.5.0 3.1.0 7.4.0 3.0.0 7.4.0 3.0.0 7.4.0 3.0.0 7.4.0 3.0.0 7.3.0 2.0.1 7.3.0 2.0.1 7.3.0 2.0.1 7.3.0 2.0.1 7.2.0 N/A 7.2.0 N/A 7.0.0 N/A 7.0.0 N/A 6.0.2 N/A 6.0.0 N/A
107 :doc:`ROCm Validation Suite <rocmvalidationsuite:index>` :doc:`ROCm Systems Profiler <rocprofiler-systems:index>` 1.3.0 1.1.0 1.0.2 1.2.0 1.1.0 1.0.2 1.2.0 1.1.0 1.0.1 1.2.0 1.1.0 1.0.0 1.1.0 0.1.2 1.1.0 0.1.1 1.1.0 0.1.0 1.1.0 0.1.0 1.0.60204 1.11.2 1.0.60202 1.11.2 1.0.60201 1.11.2 1.0.60200 1.11.2 1.0.60105 N/A 1.0.60102 N/A 1.0.60101 N/A 1.0.60100 N/A 1.0.60002 N/A 1.0.60000 N/A
108 :doc:`ROCProfiler <rocprofiler:index>` 2.0.60403 2.0.60402 2.0.60401 2.0.60400 2.0.60303 2.0.60302 2.0.60301 2.0.60300 2.0.60204 2.0.60202 2.0.60201 2.0.60200 2.0.60105 2.0.60102 2.0.60101 2.0.60100 2.0.60002 2.0.60000
109 PERFORMANCE TOOLS :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>` 0.6.0 0.6.0 0.6.0 0.6.0 0.5.0 0.5.0 0.5.0 0.5.0 0.4.0 0.4.0 0.4.0 0.4.0 N/A N/A N/A N/A N/A N/A
110 :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>` :doc:`ROCTracer <roctracer:index>` 2.6.0 1.4.0 4.1.60403 2.6.0 1.4.0 4.1.60402 2.6.0 1.4.0 4.1.60401 2.6.0 1.4.0 4.1.60400 1.4.0 4.1.60303 1.4.0 4.1.60302 1.4.0 4.1.60301 1.4.0 4.1.60300 1.4.0 4.1.60204 1.4.0 4.1.60202 1.4.0 4.1.60201 1.4.0 4.1.60200 1.4.0 4.1.60105 1.4.0 4.1.60102 1.4.0 4.1.60101 1.4.0 4.1.60100 1.4.0 4.1.60002 1.4.0 4.1.60000
111 :doc:`ROCm Compute Profiler <rocprofiler-compute:index>` 3.3.1 3.1.1 3.3.0 3.1.1 3.2.3 3.1.0 3.2.3 3.1.0 3.0.0 3.0.0 3.0.0 3.0.0 2.0.1 2.0.1 2.0.1 2.0.1 N/A N/A N/A N/A N/A N/A
112 :doc:`ROCm Systems Profiler <rocprofiler-systems:index>` DEVELOPMENT TOOLS 1.2.1 1.0.2 1.2.0 1.0.2 1.1.1 1.0.1 1.1.0 1.0.0 0.1.2 0.1.1 0.1.0 0.1.0 1.11.2 1.11.2 1.11.2 1.11.2 N/A N/A N/A N/A N/A N/A
113 :doc:`ROCProfiler <rocprofiler:index>` :doc:`HIPIFY <hipify:index>` 2.0.70101 2.0.60403 19.0.0 2.0.70100 2.0.60402 19.0.0 2.0.70002 2.0.60401 19.0.0 2.0.70000 2.0.60400 19.0.0 2.0.60303 18.0.0.25012 2.0.60302 18.0.0.25012 2.0.60301 18.0.0.24491 2.0.60300 18.0.0.24455 2.0.60204 18.0.0.24392 2.0.60202 18.0.0.24355 2.0.60201 18.0.0.24355 2.0.60200 18.0.0.24232 2.0.60105 17.0.0.24193 2.0.60102 17.0.0.24193 2.0.60101 17.0.0.24154 2.0.60100 17.0.0.24103 2.0.60002 17.0.0.24012 2.0.60000 17.0.0.23483
114 :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>` :doc:`ROCm CMake <rocmcmakebuildtools:index>` 1.0.0 0.6.0 0.14.0 1.0.0 0.6.0 0.14.0 1.0.0 0.6.0 0.14.0 1.0.0 0.6.0 0.14.0 0.5.0 0.14.0 0.5.0 0.14.0 0.5.0 0.14.0 0.5.0 0.14.0 0.4.0 0.13.0 0.4.0 0.13.0 0.4.0 0.13.0 0.4.0 0.13.0 N/A 0.12.0 N/A 0.12.0 N/A 0.12.0 N/A 0.12.0 N/A 0.11.0 N/A 0.11.0
115 :doc:`ROCTracer <roctracer:index>` :doc:`ROCdbgapi <rocdbgapi:index>` 4.1.70101 4.1.60403 0.77.2 4.1.70100 4.1.60402 0.77.2 4.1.70002 4.1.60401 0.77.2 4.1.70000 4.1.60400 0.77.2 4.1.60303 0.77.0 4.1.60302 0.77.0 4.1.60301 0.77.0 4.1.60300 0.77.0 4.1.60204 0.76.0 4.1.60202 0.76.0 4.1.60201 0.76.0 4.1.60200 0.76.0 4.1.60105 0.71.0 4.1.60102 0.71.0 4.1.60101 0.71.0 4.1.60100 0.71.0 4.1.60002 0.71.0 4.1.60000 0.71.0
116 :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>` 15.2.0 15.2.0 15.2.0 15.2.0 15.2.0 15.2.0 15.2.0 15.2.0 14.2.0 14.2.0 14.2.0 14.2.0 14.1.0 14.1.0 14.1.0 14.1.0 13.2.0 13.2.0
117 DEVELOPMENT TOOLS `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_ 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.4.0 0.3.0 0.3.0 0.3.0 0.3.0 N/A N/A
118 :doc:`HIPIFY <hipify:index>` :doc:`ROCr Debug Agent <rocr_debug_agent:index>` 20.0.0 19.0.0 2.0.4 20.0.0 19.0.0 2.0.4 20.0.0 19.0.0 2.0.4 20.0.0 19.0.0 2.0.4 18.0.0.25012 2.0.3 18.0.0.25012 2.0.3 18.0.0.24491 2.0.3 18.0.0.24455 2.0.3 18.0.0.24392 2.0.3 18.0.0.24355 2.0.3 18.0.0.24355 2.0.3 18.0.0.24232 2.0.3 17.0.0.24193 2.0.3 17.0.0.24193 2.0.3 17.0.0.24154 2.0.3 17.0.0.24103 2.0.3 17.0.0.24012 2.0.3 17.0.0.23483 2.0.3
119 :doc:`ROCm CMake <rocmcmakebuildtools:index>` 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.14.0 0.13.0 0.13.0 0.13.0 0.13.0 0.12.0 0.12.0 0.12.0 0.12.0 0.11.0 0.11.0
120 :doc:`ROCdbgapi <rocdbgapi:index>` COMPILERS 0.77.4 0.77.2 .. _compilers-support-compatibility-matrix-past-60: 0.77.4 0.77.2 0.77.4 0.77.2 0.77.3 0.77.2 0.77.0 0.77.0 0.77.0 0.77.0 0.76.0 0.76.0 0.76.0 0.76.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0
121 :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>` `clang-ocl <https://github.com/ROCm/clang-ocl>`_ 16.3.0 15.2.0 N/A 16.3.0 15.2.0 N/A 16.3.0 15.2.0 N/A 16.3.0 15.2.0 N/A 15.2.0 N/A 15.2.0 N/A 15.2.0 N/A 15.2.0 N/A 14.2.0 N/A 14.2.0 N/A 14.2.0 N/A 14.2.0 N/A 14.1.0 0.5.0 14.1.0 0.5.0 14.1.0 0.5.0 14.1.0 0.5.0 13.2.0 0.5.0 13.2.0 0.5.0
122 `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_ :doc:`hipCC <hipcc:index>` 0.5.0 0.4.0 1.1.1 0.5.0 0.4.0 1.1.1 0.5.0 0.4.0 1.1.1 0.5.0 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.4.0 1.1.1 0.3.0 1.0.0 0.3.0 1.0.0 0.3.0 1.0.0 0.3.0 1.0.0 N/A 1.0.0 N/A 1.0.0
123 :doc:`ROCr Debug Agent <rocr_debug_agent:index>` `Flang <https://github.com/ROCm/flang>`_ 2.1.0 2.0.4 19.0.0.25224 2.1.0 2.0.4 19.0.0.25224 2.1.0 2.0.4 19.0.0.25184 2.1.0 2.0.4 19.0.0.25133 2.0.3 18.0.0.25012 2.0.3 18.0.0.25012 2.0.3 18.0.0.24491 2.0.3 18.0.0.24455 2.0.3 18.0.0.24392 2.0.3 18.0.0.24355 2.0.3 18.0.0.24355 2.0.3 18.0.0.24232 2.0.3 17.0.0.24193 2.0.3 17.0.0.24193 2.0.3 17.0.0.24154 2.0.3 17.0.0.24103 2.0.3 17.0.0.24012 2.0.3 17.0.0.23483
124 :doc:`llvm-project <llvm-project:index>` 19.0.0.25224 19.0.0.25224 19.0.0.25184 19.0.0.25133 18.0.0.25012 18.0.0.25012 18.0.0.24491 18.0.0.24491 18.0.0.24392 18.0.0.24355 18.0.0.24355 18.0.0.24232 17.0.0.24193 17.0.0.24193 17.0.0.24154 17.0.0.24103 17.0.0.24012 17.0.0.23483
125 COMPILERS `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_ .. _compilers-support-compatibility-matrix-past-60: 19.0.0.25224 19.0.0.25224 19.0.0.25184 19.0.0.25133 18.0.0.25012 18.0.0.25012 18.0.0.24491 18.0.0.24491 18.0.0.24392 18.0.0.24355 18.0.0.24355 18.0.0.24232 17.0.0.24193 17.0.0.24193 17.0.0.24154 17.0.0.24103 17.0.0.24012 17.0.0.23483
126 `clang-ocl <https://github.com/ROCm/clang-ocl>`_ N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0 0.5.0
127 :doc:`hipCC <hipcc:index>` RUNTIMES 1.1.1 1.1.1 .. _runtime-support-compatibility-matrix-past-60: 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
128 `Flang <https://github.com/ROCm/flang>`_ :doc:`AMD CLR <hip:understand/amd_clr>` 20.0.025444 19.0.0.25224 6.4.43484 20.0.025425 19.0.0.25224 6.4.43484 20.0.0.25385 19.0.0.25184 6.4.43483 20.0.0.25314 19.0.0.25133 6.4.43482 18.0.0.25012 6.3.42134 18.0.0.25012 6.3.42134 18.0.0.24491 6.3.42133 18.0.0.24455 6.3.42131 18.0.0.24392 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24232 6.2.41133 17.0.0.24193 6.1.40093 17.0.0.24193 6.1.40093 17.0.0.24154 6.1.40092 17.0.0.24103 6.1.40091 17.0.0.24012 6.1.32831 17.0.0.23483 6.1.32830
129 :doc:`llvm-project <llvm-project:index>` :doc:`HIP <hip:index>` 20.0.025444 19.0.0.25224 6.4.43484 20.0.025425 19.0.0.25224 6.4.43484 20.0.0.25385 19.0.0.25184 6.4.43483 20.0.0.25314 19.0.0.25133 6.4.43482 18.0.0.25012 6.3.42134 18.0.0.25012 6.3.42134 18.0.0.24491 6.3.42133 18.0.0.24491 6.3.42131 18.0.0.24392 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24232 6.2.41133 17.0.0.24193 6.1.40093 17.0.0.24193 6.1.40093 17.0.0.24154 6.1.40092 17.0.0.24103 6.1.40091 17.0.0.24012 6.1.32831 17.0.0.23483 6.1.32830
130 `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_ `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_ 20.0.025444 19.0.0.25224 2.0.0 20.0.025425 19.0.0.25224 2.0.0 20.0.0.25385 19.0.0.25184 2.0.0 20.0.0.25314 19.0.0.25133 2.0.0 18.0.0.25012 2.0.0 18.0.0.25012 2.0.0 18.0.0.24491 2.0.0 18.0.0.24491 2.0.0 18.0.0.24392 2.0.0 18.0.0.24355 2.0.0 18.0.0.24355 2.0.0 18.0.0.24232 2.0.0 17.0.0.24193 2.0.0 17.0.0.24193 2.0.0 17.0.0.24154 2.0.0 17.0.0.24103 2.0.0 17.0.0.24012 2.0.0 17.0.0.23483 2.0.0
131 :doc:`ROCr Runtime <rocr-runtime:index>` 1.15.0 1.15.0 1.15.0 1.15.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.13.0 1.13.0 1.13.0 1.13.0 1.13.0 1.12.0 1.12.0
RUNTIMES .. _runtime-support-compatibility-matrix-past-60:
:doc:`AMD CLR <hip:understand/amd_clr>` 7.1.52802 6.4.43484 7.1.25424 6.4.43484 7.0.51831 6.4.43483 7.0.51830 6.4.43482 6.3.42134 6.3.42134 6.3.42133 6.3.42131 6.2.41134 6.2.41134 6.2.41134 6.2.41133 6.1.40093 6.1.40093 6.1.40092 6.1.40091 6.1.32831 6.1.32830
:doc:`HIP <hip:index>` 7.1.52802 6.4.43484 7.1.25424 6.4.43484 7.0.51831 6.4.43483 7.0.51830 6.4.43482 6.3.42134 6.3.42134 6.3.42133 6.3.42131 6.2.41134 6.2.41134 6.2.41134 6.2.41133 6.1.40093 6.1.40093 6.1.40092 6.1.40091 6.1.32831 6.1.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_ 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>` 1.18.0 1.15.0 1.18.0 1.15.0 1.18.0 1.15.0 1.18.0 1.15.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.13.0 1.13.0 1.13.0 1.13.0 1.13.0 1.12.0 1.12.0

View File

@@ -10,9 +10,10 @@ Use this matrix to view the ROCm compatibility and system requirements across su
You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`. You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
GPUs listed in the following table support compute workloads (no display Accelerators and GPUs listed in the following table support compute workloads (no display
information or graphics). If youre using ROCm with AMD Radeon GPUs or Ryzen APUs for graphics information or graphics). If youre using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
workloads, see the :doc:`Use ROCm on Radeon and Ryzen <radeon:index>` to verify workloads, see the `Use ROCm on Radeon GPU documentation
<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
compatibility and system requirements. compatibility and system requirements.
.. |br| raw:: html .. |br| raw:: html
@@ -22,31 +23,28 @@ compatibility and system requirements.
.. container:: format-big-table .. container:: format-big-table
.. csv-table:: .. csv-table::
:header: "ROCm Version", "7.1.1", "7.1.0", "6.4.0" :header: "ROCm Version", "6.4.3", "6.4.2", "6.3.0"
:stub-columns: 1 :stub-columns: 1
:ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2 :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5 ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
,"RHEL 10.1, 10.0, 9.7, |br| 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.5, 9.4" ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
,RHEL 8.10,RHEL 8.10,RHEL 8.10 ,RHEL 8.10,RHEL 8.10,RHEL 8.10
,SLES 15 SP7,SLES 15 SP7,SLES 15 SP6 ,"SLES 15 SP7, SP6","SLES 15 SP7, SP6","SLES 15 SP6, SP5"
,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8" ,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
,"Debian 13, 12","Debian 13, 12",Debian 12 ,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
,,,Azure Linux 3.0 ,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
,Rocky Linux 9,Rocky Linux 9,
,.. _architecture-support-compatibility-matrix:,, ,.. _architecture-support-compatibility-matrix:,,
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4, :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
,CDNA3,CDNA3,CDNA3
,CDNA2,CDNA2,CDNA2 ,CDNA2,CDNA2,CDNA2
,CDNA,CDNA,CDNA ,CDNA,CDNA,CDNA
,RDNA4,RDNA4, ,RDNA4,RDNA4,
,RDNA3,RDNA3,RDNA3 ,RDNA3,RDNA3,RDNA3
,RDNA2,RDNA2,RDNA2 ,RDNA2,RDNA2,RDNA2
,.. _gpu-support-compatibility-matrix:,, ,.. _gpu-support-compatibility-matrix:,,
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility]_,gfx950,gfx950, :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
,gfx1201,gfx1201, ,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
,gfx1200,gfx1200, ,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
,gfx1101,gfx1101,
,gfx1100,gfx1100,gfx1100 ,gfx1100,gfx1100,gfx1100
,gfx1030,gfx1030,gfx1030 ,gfx1030,gfx1030,gfx1030
,gfx942,gfx942,gfx942 ,gfx942,gfx942,gfx942
@@ -54,122 +52,151 @@ compatibility and system requirements.
,gfx908,gfx908,gfx908 ,gfx908,gfx908,gfx908
,,, ,,,
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,, FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.6, 2.5, 2.4, 2.3" :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2" :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.4.35 :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0 :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,b5997 :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.20.0 `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
,,, ,,,
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,, THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.3.0 `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0 `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
,,, ,,,
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,, THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
Thrust,2.8.5,2.8.5,2.5.0 Thrust,2.5.0,2.5.0,2.3.2
CUB,2.8.5,2.8.5,2.5.0 CUB,2.5.0,2.5.0,2.3.2
,,, ,,,
DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,, KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM]_, |br| 30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x","30.20.0 [#mi325x_KVM]_, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","6.4.x, 6.3.x, 6.2.x, 6.1.x" :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
,,, ,,,
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,, ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0 :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
:doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.12.0 :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
:doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.4.0 :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
:doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.2.0 :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
:doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.2.0 :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
:doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,0.10.0 :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
:doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,0.8.0 :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
:doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.3.1 :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
:doc:`RPP <rpp:index>`,2.1.0,2.1.0,1.9.10 :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
,,, ,,,
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,, COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
:doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.22.3 :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
:doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,2.0.0 :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,N/A
,,, ,,,
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,, MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0 `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
:doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,2.4.0 :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
:doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,0.12.0 :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
:doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.18 :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
:doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.6.0 :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
:doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,2.12.0 :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
:doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,2.4.0 :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
:doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,3.2.0 :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.3 :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
:doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,3.2.2 :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
:doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,4.4.0 :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.3.0
:doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.32 :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
:doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,3.3.0 :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
:doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.28.0 :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.27.0
:doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,3.4.0 :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
:doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,1.7.0 :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
:doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.43.0 :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
,,, ,,,
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,, PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
:doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,3.4.0 :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
:doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,1.5.0 :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
:doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,3.4.0 :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.3.0
:doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,3.3.0 :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
,,, ,,,
SUPPORT LIBS,,, SUPPORT LIBS,,,
`hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,6.4.43482 `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,6.4.0 `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.3.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_ `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
,,, ,,,
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,, SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
:doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,25.3.0 :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,24.7.1
:doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,0.3.0 :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0 :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.5.0 :doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.4.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.1.0 :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
,,, ,,,
PERFORMANCE TOOLS,,, PERFORMANCE TOOLS,,,
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,1.4.0 :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.1.0 :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.0.0
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.0.0 :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,0.1.0
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.60400 :doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60300
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,0.6.0 :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
:doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.60400 :doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60300
,,, ,,,
DEVELOPMENT TOOLS,,, DEVELOPMENT TOOLS,,,
:doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,19.0.0 :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0 :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.2 :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,15.2.0 :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.4.0 `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.0.4 :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
,,, ,,,
COMPILERS,.. _compilers-support-compatibility-matrix:,, COMPILERS,.. _compilers-support-compatibility-matrix:,,
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1 :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
`Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,19.0.0.25133 `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,18.0.0.24455
:doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,19.0.0.25133 :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,18.0.0.24491
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,19.0.0.25133 `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,18.0.0.24491
,,, ,,,
RUNTIMES,.. _runtime-support-compatibility-matrix:,, RUNTIMES,.. _runtime-support-compatibility-matrix:,,
:doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,6.4.43482 :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.3.42131
:doc:`HIP <hip:index>`,7.1.52802,7.1.25424,6.4.43482 :doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.3.42131
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0 `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0 :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__. .. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__. .. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
.. [#dgl_compat] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 7.0.0 and ROCm 6.4.x. .. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
.. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package. .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
.. _OS-kernel-versions: .. _OS-kernel-versions:
Operating systems, kernel and Glibc versions Operating systems, kernel and Glibc versions
********************************************* *********************************************
For detailed information on operating system supported on ROCm 7.1.1 and associated Kernel and Glibc version, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__. Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
.. csv-table::
:header: "OS", "Version", "Kernel", "Glibc"
:widths: 40, 20, 30, 20
:stub-columns: 1
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
,,
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
,,
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
,9.5, 5.14+, 2.34
,9.4, 5.14+, 2.34
,9.3, 5.14+, 2.34
,,
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
,8.9, 4.18.0, 2.28
,,
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
,15 SP6, "6.5.0+, 6.4.0", 2.38
,15 SP5, 5.14.21, 2.31
,,
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
,8, 5.15.0 (UEK), 2.28
,,
`Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
,,
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
,,
.. note:: .. note::
@@ -201,18 +228,24 @@ Expand for full historical view of:
.. rubric:: Footnotes .. rubric:: Footnotes
.. [#os-compatibility-past-60] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__. .. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
.. [#gpu-compatibility-past-60] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__. .. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
.. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. .. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
.. [#verl_compat-past-60] verl is only supported on ROCm 7.0.0 and 6.2.0. .. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0. .. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
.. [#dgl_compat-past-60] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
.. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0. .. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
.. [#ray_compat-past-60] Ray is only supported on ROCm 7.0.0 and 6.4.1. .. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
.. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 7.0.0 and 6.4.x. .. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
.. [#flashinfer_compat-past-60] FlashInfer is only supported on ROCm 6.4.1. .. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
.. [#mi325x_KVM-past-60] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
.. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0. .. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_. .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
.. [#verl_compat] verl is only supported on ROCm 6.2.0.
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
.. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
.. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package. .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: Deep Graph Library (DGL) compatibility :description: Deep Graph Library (DGL) compatibility
:keywords: GPU, CPU, deep graph library, DGL, deep learning, framework compatibility :keywords: GPU, DGL compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -10,274 +10,215 @@
DGL compatibility DGL compatibility
******************************************************************************** ********************************************************************************
Deep Graph Library (`DGL <https://www.dgl.ai/>`__) is an easy-to-use, high-performance, and scalable Deep Graph Library `(DGL) <https://www.dgl.ai/>`_ is an easy-to-use, high-performance and scalable
Python package for deep learning on graphs. DGL is framework agnostic, meaning Python package for deep learning on graphs. DGL is framework agnostic, meaning
that if a deep graph model is a component in an end-to-end application, the rest of if a deep graph model is a component in an end-to-end application, the rest of
the logic is implemented using PyTorch. the logic is implemented using PyTorch.
DGL provides a high-performance graph object that can reside on either CPUs or GPUs. * ROCm support for DGL is hosted in the `https://github.com/ROCm/dgl <https://github.com/ROCm/dgl>`_ repository.
It bundles structural data features for better control and provides a variety of functions * Due to independent compatibility considerations, this location differs from the `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`_ upstream repository.
for computing with graph objects, including efficient and customizable message passing * Use the prebuilt :ref:`Docker images <dgl-docker-compat>` with DGL, PyTorch, and ROCm preinstalled.
primitives for Graph Neural Networks. * See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>`
to install and get started.
Support overview
Supported devices
================================================================================ ================================================================================
- The ROCm-supported version of DGL is maintained in the official `https://github.com/ROCm/dgl - **Officially Supported**: TF32 with AMD Instinct MI300X (through hipblaslt)
<https://github.com/ROCm/dgl>`__ repository, which differs from the - **Partially Supported**: TF32 with AMD Instinct MI250X
`https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`__ upstream repository.
- To get started and install DGL on ROCm, use the prebuilt :ref:`Docker images <dgl-docker-compat>`,
which include ROCm, DGL, and all required dependencies.
- See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>` .. _dgl-recommendations:
for installation and setup instructions.
Use cases and recommendations
================================================================================
DGL can be used for Graph Learning, and building popular graph models like
GAT, GCN and GraphSage. Using these we can support a variety of use-cases such as:
- Recommender systems
- Network Optimization and Analysis
- 1D (Temporal) and 2D (Image) Classification
- Drug Discovery
Multiple use cases of DGL have been tested and verified.
However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_,
where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs.
Coverage includes:
- Single-GPU training/inference
- Multi-GPU training
- You can also consult the upstream `Installation guide <https://www.dgl.ai/pages/start.html>`__
for additional context.
.. _dgl-docker-compat: .. _dgl-docker-compat:
Compatibility matrix Docker image compatibility
================================================================================ ================================================================================
.. |docker-icon| raw:: html .. |docker-icon| raw:: html
<i class="fab fa-docker"></i> <i class="fab fa-docker"></i>
AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl/tags>`__ AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl>`_
with ROCm backends on Docker Hub. The following Docker image tags and associated with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest available DGL version from the official Docker Hub. inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
Click the |docker-icon| to view the image on Docker Hub. Click the |docker-icon| to view the image on Docker Hub.
.. list-table:: .. list-table:: DGL Docker image components
:header-rows: 1 :header-rows: 1
:class: docker-image-compatibility :class: docker-image-compatibility
* - Docker image * - Docker
- ROCm
- DGL - DGL
- PyTorch - PyTorch
- Ubuntu - Ubuntu
- Python - Python
- GPU
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
- `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
- 24.04 - 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__ - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
- MI300X, MI250X
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04 - 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__ - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
- MI300X, MI250X
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
- 22.04 - 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__ - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- MI300X, MI250X
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
- MI300X, MI250X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
- MI300X, MI250X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
- MI300X, MI250X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
- 22.04 - 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__ - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- MI300X, MI250X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
- 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
- MI300X, MI250X
.. _dgl-key-rocm-libraries:
Key ROCm libraries for DGL Key ROCm libraries for DGL
================================================================================ ================================================================================
DGL on ROCm depends on specific libraries that affect its features and performance. DGL on ROCm depends on specific libraries that affect its features and performance.
Using the DGL Docker container or building it with the provided Docker file or a ROCm base image is recommended. Using the DGL Docker container or building it with the provided docker file or a ROCm base image is recommended.
If you prefer to build it yourself, ensure the following dependencies are installed: If you prefer to build it yourself, ensure the following dependencies are installed:
.. list-table:: .. list-table::
:header-rows: 1 :header-rows: 1
* - ROCm library * - ROCm library
- ROCm 7.0.0 Version - Version
- ROCm 6.4.x Version
- Purpose - Purpose
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_ * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
- 1.1.0 - :version-ref:`"Composable Kernel" rocm_version`
- 1.1.0
- Enables faster execution of core operations like matrix multiplication - Enables faster execution of core operations like matrix multiplication
(GEMM), convolutions and transformations. (GEMM), convolutions and transformations.
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_ * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
- 3.0.0 - :version-ref:`hipBLAS rocm_version`
- 2.4.0
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
matrix and vector operations. matrix and vector operations.
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_ * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
- 1.0.0 - :version-ref:`hipBLASLt rocm_version`
- 0.12.0
- hipBLASLt is an extension of the hipBLAS library, providing additional - hipBLASLt is an extension of the hipBLAS library, providing additional
features like epilogues fused into the matrix multiplication kernel or features like epilogues fused into the matrix multiplication kernel or
use of integer tensor cores. use of integer tensor cores.
* - `hipCUB <https://github.com/ROCm/hipCUB>`_ * - `hipCUB <https://github.com/ROCm/hipCUB>`_
- 4.0.0 - :version-ref:`hipCUB rocm_version`
- 3.4.0
- Provides a C++ template library for parallel algorithms for reduction, - Provides a C++ template library for parallel algorithms for reduction,
scan, sort and select. scan, sort and select.
* - `hipFFT <https://github.com/ROCm/hipFFT>`_ * - `hipFFT <https://github.com/ROCm/hipFFT>`_
- 1.0.20 - :version-ref:`hipFFT rocm_version`
- 1.0.18
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations. - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
* - `hipRAND <https://github.com/ROCm/hipRAND>`_ * - `hipRAND <https://github.com/ROCm/hipRAND>`_
- 3.0.0 - :version-ref:`hipRAND rocm_version`
- 2.12.0
- Provides fast random number generation for GPUs. - Provides fast random number generation for GPUs.
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_ * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
- 3.0.0 - :version-ref:`hipSOLVER rocm_version`
- 2.4.0
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
singular value decompositions (SVD). singular value decompositions (SVD).
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_ * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
- 4.0.1 - :version-ref:`hipSPARSE rocm_version`
- 3.2.0
- Accelerates operations on sparse matrices, such as sparse matrix-vector - Accelerates operations on sparse matrices, such as sparse matrix-vector
or matrix-matrix products. or matrix-matrix products.
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_ * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
- 0.2.4 - :version-ref:`hipSPARSELt rocm_version`
- 0.2.3
- Accelerates operations on sparse matrices, such as sparse matrix-vector - Accelerates operations on sparse matrices, such as sparse matrix-vector
or matrix-matrix products. or matrix-matrix products.
* - `hipTensor <https://github.com/ROCm/hipTensor>`_ * - `hipTensor <https://github.com/ROCm/hipTensor>`_
- 2.0.0 - :version-ref:`hipTensor rocm_version`
- 1.5.0
- Optimizes for high-performance tensor operations, such as contractions. - Optimizes for high-performance tensor operations, such as contractions.
* - `MIOpen <https://github.com/ROCm/MIOpen>`_ * - `MIOpen <https://github.com/ROCm/MIOpen>`_
- 3.5.0 - :version-ref:`MIOpen rocm_version`
- 3.4.0
- Optimizes deep learning primitives such as convolutions, pooling, - Optimizes deep learning primitives such as convolutions, pooling,
normalization, and activation functions. normalization, and activation functions.
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_ * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
- 2.13.0 - :version-ref:`MIGraphX rocm_version`
- 2.12.0
- Adds graph-level optimizations, ONNX models and mixed precision support - Adds graph-level optimizations, ONNX models and mixed precision support
and enable Ahead-of-Time (AOT) Compilation. and enable Ahead-of-Time (AOT) Compilation.
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_ * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
- 3.3.0 - :version-ref:`MIVisionX rocm_version`
- 3.2.0
- Optimizes acceleration for computer vision and AI workloads like - Optimizes acceleration for computer vision and AI workloads like
preprocessing, augmentation, and inferencing. preprocessing, augmentation, and inferencing.
* - `rocAL <https://github.com/ROCm/rocAL>`_ * - `rocAL <https://github.com/ROCm/rocAL>`_
- 3.3.0 - :version-ref:`rocAL rocm_version`
- 2.2.0
- Accelerates the data pipeline by offloading intensive preprocessing and - Accelerates the data pipeline by offloading intensive preprocessing and
augmentation tasks. rocAL is part of MIVisionX. augmentation tasks. rocAL is part of MIVisionX.
* - `RCCL <https://github.com/ROCm/rccl>`_ * - `RCCL <https://github.com/ROCm/rccl>`_
- 2.26.6 - :version-ref:`RCCL rocm_version`
- 2.22.3
- Optimizes for multi-GPU communication for operations like AllReduce and - Optimizes for multi-GPU communication for operations like AllReduce and
Broadcast. Broadcast.
* - `rocDecode <https://github.com/ROCm/rocDecode>`_ * - `rocDecode <https://github.com/ROCm/rocDecode>`_
- 1.0.0 - :version-ref:`rocDecode rocm_version`
- 0.10.0
- Provides hardware-accelerated data decoding capabilities, particularly - Provides hardware-accelerated data decoding capabilities, particularly
for image, video, and other dataset formats. for image, video, and other dataset formats.
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_ * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
- 1.1.0 - :version-ref:`rocJPEG rocm_version`
- 0.8.0
- Provides hardware-accelerated JPEG image decoding and encoding. - Provides hardware-accelerated JPEG image decoding and encoding.
* - `RPP <https://github.com/ROCm/RPP>`_ * - `RPP <https://github.com/ROCm/RPP>`_
- 2.0.0 - :version-ref:`RPP rocm_version`
- 1.9.10
- Speeds up data augmentation, transformation, and other preprocessing steps. - Speeds up data augmentation, transformation, and other preprocessing steps.
* - `rocThrust <https://github.com/ROCm/rocThrust>`_ * - `rocThrust <https://github.com/ROCm/rocThrust>`_
- 4.0.0 - :version-ref:`rocThrust rocm_version`
- 3.3.0
- Provides a C++ template library for parallel algorithms like sorting, - Provides a C++ template library for parallel algorithms like sorting,
reduction, and scanning. reduction, and scanning.
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_ * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
- 2.0.0 - :version-ref:`rocWMMA rocm_version`
- 1.7.0
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
multiplication (GEMM) and accumulation operations with mixed precision multiplication (GEMM) and accumulation operations with mixed precision
support. support.
.. _dgl-supported-features-latest:
Supported features with ROCm 7.0.0 Supported features
================================================================================ ================================================================================
Many functions and methods available upstream are also supported in DGL on ROCm. Many functions and methods available in DGL Upstream are also supported in DGL ROCm.
Instead of listing them all, support is grouped into the following categories to provide a general overview. Instead of listing them all, support is grouped into the following categories to provide a general overview.
* DGL Base * DGL Base
* DGL Backend * DGL Backend
* DGL Data * DGL Data
* DGL Dataloading * DGL Dataloading
* DGL Graph * DGL DGLGraph
* DGL Function * DGL Function
* DGL Ops * DGL Ops
* DGL Sampling * DGL Sampling
@@ -289,76 +230,26 @@ Instead of listing them all, support is grouped into the following categories to
* DGL NN * DGL NN
* DGL Optim * DGL Optim
* DGL Sparse * DGL Sparse
* GraphBolt
.. _dgl-unsupported-features-latest:
Unsupported features with ROCm 7.0.0 Unsupported features
================================================================================ ================================================================================
* TF32 Support (only supported for PyTorch 2.7 and above) * Graphbolt
* Kineto/ROCTracer integration * Partial TF32 Support (MI250x only)
* Kineto/ ROCTracer integration
.. _dgl-unsupported-functions:
Unsupported functions with ROCm 7.0.0 Unsupported functions
================================================================================ ================================================================================
* ``bfs`` * ``more_nnz``
* ``format`` * ``format``
* ``multiprocess_sparse_adam_state_dict`` * ``multiprocess_sparse_adam_state_dict``
* ``record_stream_ndarray``
* ``half_spmm`` * ``half_spmm``
* ``segment_mm`` * ``segment_mm``
* ``gather_mm_idx_b`` * ``gather_mm_idx_b``
* ``pgexplainer``
* ``sample_labors_prob`` * ``sample_labors_prob``
* ``sample_labors_noprob`` * ``sample_labors_noprob``
* ``sparse_admin``
.. _dgl-recommendations:
Use cases and recommendations
================================================================================
DGL can be used for Graph Learning, and building popular graph models like
GAT, GCN, and GraphSage. Using these models, a variety of use cases are supported:
- Recommender systems
- Network Optimization and Analysis
- 1D (Temporal) and 2D (Image) Classification
- Drug Discovery
For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
* Although multiple use cases of DGL have been tested and verified, a few have been
outlined in the `DGL in the Real World: Running GNNs on Real Use Cases
<https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog
post, which walks through four real-world graph neural network (GNN) workloads
implemented with the Deep Graph Library on ROCm. It covers tasks ranging from
heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph
regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use
case, the authors detail: the dataset and task, how DGL is used, and their experience
porting to ROCm. It is shown that DGL codebases often run without modification, with
seamless integration of graph operations, message passing, sampling, and convolution.
* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware
<https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__
blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform,
bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges
the gap between dense tensor frameworks and the irregular nature of graph data through a
graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and
interoperability across frameworks like PyTorch and TensorFlow. AMDs ROCm integration
enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers
and open-source repositories. This marks a major step in AMD's mission to advance open,
scalable AI ecosystems beyond traditional architectures.
You can pre-process datasets and begin training on AMD GPUs through:
* Single-GPU training/inference
* Multi-GPU training
Previous versions
===============================================================================
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
of the ``ROCm/dgl`` Docker image.

View File

@@ -1,98 +0,0 @@
:orphan:
.. meta::
:description: FlashInfer compatibility
:keywords: GPU, LLM, FlashInfer, deep learning, framework compatibility
.. version-set:: rocm_version latest
********************************************************************************
FlashInfer compatibility
********************************************************************************
`FlashInfer <https://docs.flashinfer.ai/index.html>`__ is a library and kernel generator
for Large Language Models (LLMs) that provides a high-performance implementation of graphics
processing units (GPUs) kernels. FlashInfer focuses on LLM serving and inference, as well
as advanced performance across diverse scenarios.
FlashInfer features highly efficient attention kernels, load-balanced scheduling, and memory-optimized
techniques, while supporting customized attention variants. Its compatible with ``torch.compile``, and
offers high-performance LLM-specific operators, with easy integration through PyTorch, and C++ APIs.
.. note::
The ROCm port of FlashInfer is under active development, and some features are not yet available.
For the latest feature compatibility matrix, refer to the ``README`` of the
`https://github.com/ROCm/flashinfer <https://github.com/ROCm/flashinfer>`__ repository.
Support overview
================================================================================
- The ROCm-supported version of FlashInfer is maintained in the official `https://github.com/ROCm/flashinfer
<https://github.com/ROCm/flashinfer>`__ repository, which differs from the
`https://github.com/flashinfer-ai/flashinfer <https://github.com/flashinfer-ai/flashinfer>`__
upstream repository.
- To get started and install FlashInfer on ROCm, use the prebuilt :ref:`Docker images <flashinfer-docker-compat>`,
which include ROCm, FlashInfer, and all required dependencies.
- See the :doc:`ROCm FlashInfer installation guide <rocm-install-on-linux:install/3rd-party/flashinfer-install>`
for installation and setup instructions.
- You can also consult the upstream `Installation guide <https://docs.flashinfer.ai/installation.html>`__
for additional context.
.. _flashinfer-docker-compat:
Compatibility matrix
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `FlashInfer images <https://hub.docker.com/r/rocm/flashinfer/tags>`__
with ROCm backends on Docker Hub. The following Docker image tag and associated
inventories represent the latest available FlashInfer version from the official Docker Hub.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- ROCm
- FlashInfer
- PyTorch
- Ubuntu
- Python
- GPU
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/flashinfer/flashinfer-0.2.5_rocm6.4_ubuntu24.04_py3.12_pytorch2.7/images/sha256-558914838821c88c557fb6d42cfbc1bdb67d79d19759f37c764a9ee801f93313"><i class="fab fa-docker fa-lg"></i> rocm/flashinfer</a>
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
- `v0.2.5 <https://github.com/flashinfer-ai/flashinfer/releases/tag/v0.2.5>`__
- `2.7.1 <https://github.com/ROCm/pytorch/releases/tag/v2.7.1>`__
- 24.04
- `3.12 <https://www.python.org/downloads/release/python-3129/>`__
- MI300X
.. _flashinfer-recommendations:
Use cases and recommendations
================================================================================
The release of FlashInfer on ROCm provides the decode functionality for LLM inferencing.
In the decode phase, tokens are generated sequentially, with the model predicting each new
token based on the previously generated tokens and the input context.
FlashInfer on ROCm brings over upstream features such as load balancing, sparse and dense
attention optimizations, and batching support, enabling efficient execution on AMD Instinct™ MI300X GPUs.
Because large LLMs often require substantial KV caches or long context windows, FlashInfer on ROCm
also implements cascade attention from upstream to reduce memory usage.
For currently supported use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
where you can search for examples and best practices to optimize your workloads on AMD GPUs.

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: JAX compatibility :description: JAX compatibility
:keywords: GPU, JAX, deep learning, framework compatibility :keywords: GPU, JAX compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -10,58 +10,42 @@
JAX compatibility JAX compatibility
******************************************************************************* *******************************************************************************
`JAX <https://docs.jax.dev/en/latest/notebooks/thinking_in_jax.html>`__ is a library JAX provides a NumPy-like API, which combines automatic differentiation and the
for array-oriented numerical computation (similar to NumPy), with automatic differentiation Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
and just-in-time (JIT) compilation to enable high-performance machine learning research. learning at scale.
JAX provides an API that combines automatic differentiation and the JAX uses composable transformations of Python and NumPy through just-in-time
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine (JIT) compilation, automatic vectorization, and parallelization. To learn about
learning at scale. JAX uses composable transformations of Python and NumPy through JAX, including profiling and optimizations, see the official `JAX documentation
JIT compilation, automatic vectorization, and parallelization. <https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
Support overview ROCm support for JAX is upstreamed, and users can build the official source code
================================================================================ with ROCm support:
- The ROCm-supported version of JAX is maintained in the official `https://github.com/ROCm/rocm-jax - ROCm JAX release:
<https://github.com/ROCm/rocm-jax>`__ repository, which differs from the
`https://github.com/jax-ml/jax <https://github.com/jax-ml/jax>`__ upstream repository.
- To get started and install JAX on ROCm, use the prebuilt :ref:`Docker images <jax-docker-compat>`, - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
which include ROCm, JAX, and all required dependencies. with ROCm and JAX preinstalled.
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>` - ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
for installation and setup instructions.
- You can also consult the upstream `Installation guide <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`__ - See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
for additional context. to get started.
Version support - Official JAX release:
--------------------------------------------------------------------------------
AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax/tags>`_ - Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
quarterly alongside new ROCm releases. These images undergo full AMD testing.
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community/tags>`_
follow upstream JAX releases and use the latest available ROCm version.
JAX Plugin-PJRT with JAX/JAXLIB compatibility - See the `AMD GPU (Linux) installation section
================================================================================ <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
the JAX documentation.
Portable JIT Runtime (PJRT) is an open, stable interface for device runtime and .. note::
compiler. The following table details the ROCm version compatibility matrix
between JAX PluginPJRT and JAX/JAXLIB.
.. list-table:: AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
:header-rows: 1 quarterly alongside new ROCm releases. These images undergo full AMD testing.
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
* - JAX Plugin-PJRT follow upstream JAX releases and use the latest available ROCm version.
- JAX/JAXLIB
- ROCm
* - 0.7.1
- 0.7.1
- 7.1.1, 7.1.0
* - 0.6.0
- 0.6.2, 0.6.0
- 7.0.2, 7.0.1, 7.0.0
Use cases and recommendations Use cases and recommendations
================================================================================ ================================================================================
@@ -87,7 +71,7 @@ Use cases and recommendations
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_ * The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
outlines the process of fine-tuning a Bidirectional Encoder Representations outlines the process of fine-tuning a Bidirectional Encoder Representations
from Transformers (BERT)-based large language model (LLM) using JAX for a text from Transformers (BERT)-based large language model (LLM) using JAX for a text
classification task. The blog post discusses techniques for parallelizing the classification task. The blog post discuss techniques for parallelizing the
fine-tuning across multiple AMD GPUs and assess the model's performance on a fine-tuning across multiple AMD GPUs and assess the model's performance on a
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
and the General Language Understanding Evaluation (GLUE) benchmark dataset was and the General Language Understanding Evaluation (GLUE) benchmark dataset was
@@ -95,7 +79,7 @@ Use cases and recommendations
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_ * The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
GPU using ROCm. The page is aimed at helping users achieve optimal accelerator using ROCm. The page is aimed at helping users achieve optimal
performance for deep learning and other high-performance computing tasks on performance for deep learning and other high-performance computing tasks on
the MI300X GPU. the MI300X GPU.
@@ -106,15 +90,75 @@ For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.b
Docker image compatibility Docker image compatibility
================================================================================ ================================================================================
AMD validates and publishes `JAX images <https://hub.docker.com/r/rocm/jax/tags>`__ .. |docker-icon| raw:: html
with ROCm backends on Docker Hub.
For ``jax-community`` images, see `rocm/jax-community <i class="fab fa-docker"></i>
<https://hub.docker.com/r/rocm/jax-community/tags>`__ on Docker Hub.
To find the right image tag, see the :ref:`JAX on ROCm installation AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
documentation <rocm-install-on-linux:jax-docker-support>` for a list of with ROCm backends on Docker Hub. The following Docker image tags and
available ``rocm/jax`` images. associated inventories represent the latest JAX version from the official Docker Hub and are validated for
`ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
icon to view the image on Docker Hub.
.. list-table:: JAX Docker image components
:header-rows: 1
* - Docker image
- JAX
- Linux
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- Ubuntu 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- Ubuntu 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
with ROCm backends on Docker Hub. The following Docker image tags and
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
.. list-table:: JAX community Docker image components
:header-rows: 1
* - Docker image
- JAX
- Linux
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- Ubuntu 22.04
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- Ubuntu 22.04
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- Ubuntu 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
.. _key_rocm_libraries: .. _key_rocm_libraries:
@@ -250,7 +294,7 @@ The ROCm supported data types in JAX are collected in the following table.
.. note:: .. note::
JAX data type support is affected by the :ref:`key_rocm_libraries` and it's JAX data type support is effected by the :ref:`key_rocm_libraries` and it's
collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>` collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>`
page. page.
@@ -266,54 +310,5 @@ For a complete and up-to-date list of JAX public modules (for example, ``jax.num
Since version 0.1.56, JAX has full support for ROCm, and the Since version 0.1.56, JAX has full support for ROCm, and the
:ref:`Known issues and important notes <jax_comp_known_issues>` section :ref:`Known issues and important notes <jax_comp_known_issues>` section
contains details about limitations specific to the ROCm backend. The list of contains details about limitations specific to the ROCm backend. The list of
JAX API modules are maintained by the JAX project and is subject to change. JAX API modules is maintained by the JAX project and is subject to change.
Refer to the official Jax documentation for the most up-to-date information. Refer to the official Jax documentation for the most up-to-date information.
Key features and enhancements for ROCm 7.0
===============================================================================
- Upgraded XLA backend: Integrates a newer XLA version, enabling better
optimizations, broader operator support, and potential performance gains.
- RNN support: Native RNN support (including LSTMs via ``jax.experimental.rnn``)
now available on ROCm, aiding sequence model development.
- Comprehensive linear algebra capabilities: Offers robust ``jax.linalg``
operations, essential for scientific and machine learning tasks.
- Expanded AMD GPU architecture support: Provides ongoing support for gfx1101
GPUs and introduces support for gfx950 and gfx12xx GPUs.
- Mixed FP8 precision support: Enables ``lax.dot_general`` operations with mixed FP8
types, offering pathways for memory and compute efficiency.
- Streamlined PyPi packaging: Provides reliable PyPi wheels for JAX on ROCm,
simplifying the installation process.
- Pallas experimental kernel development: Continued Pallas framework
enhancements for custom GPU kernels, including new intrinsics (specific
kernel behaviors under review).
- Improved build system and CI: Enhanced ROCm build system and CI for greater
reliability and maintainability.
- Enhanced distributed computing setup: Improved JAX setup in multi-GPU
distributed environments.
.. _jax_comp_known_issues:
Known issues and notes for ROCm 7.0
===============================================================================
- ``nn.dot_product_attention``: Certain configurations of ``jax.nn.dot_product_attention``
may cause segmentation faults, though the majority of use cases work correctly.
- SVD with dynamic shapes: SVD on inputs with dynamic/symbolic shapes might result in an error.
SVD with static shapes is unaffected.
- QR decomposition with symbolic shapes: QR decomposition operations may fail when using
symbolic/dynamic shapes in shape polymorphic contexts.
- Pallas kernels: Specific advanced Pallas kernels may exhibit variations in
numerical output or resource usage. These are actively reviewed as part of
Pallas's experimental development.

View File

@@ -1,275 +0,0 @@
:orphan:
.. meta::
:description: llama.cpp compatibility
:keywords: GPU, GGML, llama.cpp, deep learning, framework compatibility
.. version-set:: rocm_version latest
********************************************************************************
llama.cpp compatibility
********************************************************************************
`llama.cpp <https://github.com/ggml-org/llama.cpp>`__ is an open-source framework
for Large Language Model (LLM) inference that runs on both central processing units
(CPUs) and graphics processing units (GPUs). It is written in plain C/C++, providing
a simple, dependency-free setup.
The framework supports multiple quantization options, from 1.5-bit to 8-bit integers,
to accelerate inference and reduce memory usage. Originally built as a CPU-first library,
llama.cpp is easy to integrate with other programming environments and is widely
adopted across diverse platforms, including consumer devices.
Support overview
================================================================================
- The ROCm-supported version of llama.cpp is maintained in the official `https://github.com/ROCm/llama.cpp
<https://github.com/ROCm/llama.cpp>`__ repository, which differs from the
`https://github.com/ggml-org/llama.cpp <https://github.com/ggml-org/llama.cpp>`__ upstream repository.
- To get started and install llama.cpp on ROCm, use the prebuilt :ref:`Docker images <llama-cpp-docker-compat>`,
which include ROCm, llama.cpp, and all required dependencies.
- See the :doc:`ROCm llama.cpp installation guide <rocm-install-on-linux:install/3rd-party/llama-cpp-install>`
for installation and setup instructions.
- You can also consult the upstream `Installation guide <https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md>`__
for additional context.
.. _llama-cpp-docker-compat:
Compatibility matrix
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `llama.cpp images <https://hub.docker.com/r/rocm/llama.cpp/tags>`__
with ROCm backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest available llama.cpp versions from the official Docker Hub.
Click |docker-icon| to view the image on Docker Hub.
.. important::
Tag endings of ``_full``, ``_server``, and ``_light`` serve different purposes for entrypoints as follows:
- Full: This image includes both the main executable file and the tools to convert ``LLaMA`` models into ``ggml`` and convert into 4-bit quantization.
- Server: This image only includes the server executable file.
- Light: This image only includes the main executable file.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Full Docker
- Server Docker
- Light Docker
- llama.cpp
- ROCm
- Ubuntu
- GPU
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- 24.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- 22.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_full/images/sha256-5960fc850024a8a76451f9eaadd89b7e59981ae9f393b407310c1ddf18892577"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_server/images/sha256-1b79775d9f546065a6aaf9ca426e1dd4ed4de0b8f6ee83687758cc05af6538e6"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_light/images/sha256-8f863c4c2857ae42bebd64e4f1a0a1e7cc3ec4503f243e32b4a4dcad070ec361"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
- 24.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_full/images/sha256-888879b3ee208f9247076d7984524b8d1701ac72611689e89854a1588bec9867"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_server/images/sha256-90e4ff99a66743e33fd00728cd71a768588e5f5ef355aaa196669fe65ac70672"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_light/images/sha256-bd447a049939cb99054f8fbf3f2352870fe906a75e2dc3339c845c08b9c53f9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
- 22.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_full/images/sha256-5b3a1bc4889c1fcade434b937fbf9cc1c22ff7dc0317c130339b0c9238bc88c4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_server/images/sha256-5228ff99d0f627a9032d668f4381b2e80dc1e301adc3e0821f26d8354b175271"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_light/images/sha256-b12723b332a826a89b7252dddf868cbe4d1a869562fc4aa4032f59e1a683b968"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
- 24.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_full/images/sha256-cd6e21a6a73f59b35dd5309b09dd77654a94d783bf13a55c14eb8dbf8e9c2615"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_server/images/sha256-c2b4689ab2c47e6626e8fea22d7a63eb03d47c0fde9f5ef8c9f158d15c423e58"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_light/images/sha256-1acc28f29ed87db9cbda629cb29e1989b8219884afe05f9105522be929e94da4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
- 22.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_full/images/sha256-2f8ae8a44510d96d52dea6cb398b224f7edeb7802df7ec488c6f63d206b3cdc9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_server/images/sha256-fece497ff9f4a28b12f645de52766941da8ead8471aa1ea84b61d4b4568e51f2"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_light/images/sha256-3e14352fa6f8c6128b23cf9342531c20dbfb522550b626e09d83b260a1947022"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
- 24.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_full/images/sha256-80763062ef0bec15038c35fd01267f1fc99a5dd171d4b48583cc668b15efad69"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_server/images/sha256-db2a6c957555ed83b819bbc54aea884a93192da0fb512dae63d32e0dc4e8ab8f"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_light/images/sha256-c6dbb07cc655fb079d5216e4b77451cb64a9daa0585d23b6fb8b32cb22021197"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
- 22.04
- MI325X, MI300X, MI210
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_full/images/sha256-f78f6c81ab2f8e957469415fe2370a1334fe969c381d1fe46050c85effaee9d5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_server/images/sha256-275ad9e18f292c26a00a2de840c37917e98737a88a3520bdc35fd3fc5c9a6a9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_light/images/sha256-cc324e6faeedf0e400011f07b49d2dc41a16bae257b2b7befa0f4e2e97231320"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b5997 <https://github.com/ROCm/llama.cpp/tree/release/b5997>`__
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- 24.04
- MI300X, MI210
.. _llama-cpp-key-rocm-libraries:
Key ROCm libraries for llama.cpp
================================================================================
llama.cpp functionality on ROCm is determined by its underlying library
dependencies. These ROCm components affect the capabilities, performance, and
feature set available to developers. Ensure you have the required libraries for
your corresponding ROCm version.
.. list-table::
:header-rows: 1
* - ROCm library
- ROCm 7.0.0 version
- ROCm 6.4.x version
- Purpose
- Usage
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
- 3.0.0
- 2.4.0
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
matrix and vector operations.
- Supports operations such as matrix multiplication, matrix-vector
products, and tensor contractions. Utilized in both dense and batched
linear algebra operations.
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
- 1.0.0
- 0.12.0
- hipBLASLt is an extension of the hipBLAS library, providing additional
features like epilogues fused into the matrix multiplication kernel or
use of integer tensor cores.
- By setting the flag ``ROCBLAS_USE_HIPBLASLT``, you can dispatch hipblasLt
kernels where possible.
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
- 2.0.0
- 1.7.0
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
multiplication (GEMM) and accumulation operations with mixed precision
support.
- Can be used to enhance the flash attention performance on AMD compute, by enabling
the flag during compile time.
.. _llama-cpp-uses-recommendations:
Use cases and recommendations
================================================================================
llama.cpp can be applied in a variety of scenarios, particularly when you need to meet one or more of the following requirements:
- Plain C/C++ implementation with no external dependencies
- Support for 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory usage
- Custom HIP (Heterogeneous-compute Interface for Portability) kernels for running large language models (LLMs) on AMD GPUs (graphics processing units)
- CPU (central processing unit) + GPU (graphics processing unit) hybrid inference for partially accelerating models larger than the total available VRAM (video random-access memory)
llama.cpp is also used in a range of real-world applications, including:
- Games such as `Lucy's Labyrinth <https://github.com/MorganRO8/Lucys_Labyrinth>`__:
A simple maze game where AI-controlled agents attempt to trick the player.
- Tools such as `Styled Lines <https://marketplace.unity.com/packages/tools/ai-ml-integration/style-text-webgl-ios-stand-alone-llm-llama-cpp-wrapper-292902>`__:
A proprietary, asynchronous inference wrapper for Unity3D game development, including pre-built mobile and web platform wrappers and a model example.
- Various other AI applications use llama.cpp as their inference engine;
for a detailed list, see the `user interfaces (UIs) section <https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#description>`__.
For more use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
where you can search for llama.cpp examples and best practices to optimize your workloads on AMD GPUs.
- The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__
blog post outlines how the open-source llama.cpp framework enables efficient LLM inference—including interactive inference with ``llama-cli``,
server deployment with ``llama-server``, GGUF model preparation and quantization, performance benchmarking, and optimizations tailored for
AMD Instinct GPUs within the ROCm ecosystem.
Previous versions
===============================================================================
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/llama-cpp-history` to find documentation for previous releases
of the ``ROCm/llama.cpp`` Docker image.

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: Megablocks compatibility :description: Megablocks compatibility
:keywords: GPU, megablocks, deep learning, framework compatibility :keywords: GPU, megablocks, compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -10,41 +10,64 @@
Megablocks compatibility Megablocks compatibility
******************************************************************************** ********************************************************************************
`Megablocks <https://github.com/databricks/megablocks>`__ is a lightweight library Megablocks is a light-weight library for mixture-of-experts (MoE) training.
for mixture-of-experts `(MoE) <https://huggingface.co/blog/moe>`__ training.
The core of the system is efficient "dropless-MoE" and standard MoE layers. The core of the system is efficient "dropless-MoE" and standard MoE layers.
Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_,
<https://github.com/stanford-futuredata/Megatron-LM>`__,
where data and pipeline parallel training of MoEs is supported. where data and pipeline parallel training of MoEs is supported.
Support overview * ROCm support for Megablocks is hosted in the official `https://github.com/ROCm/megablocks <https://github.com/ROCm/megablocks>`_ repository.
* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
* Use the prebuilt :ref:`Docker image <megablocks-docker-compat>` with ROCm, PyTorch, and Megablocks preinstalled.
* See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` to install and get started.
.. note::
Megablocks is supported on ROCm 6.3.0.
Supported devices
================================================================================ ================================================================================
- The ROCm-supported version of Megablocks is maintained in the official `https://github.com/ROCm/megablocks - **Officially Supported**: AMD Instinct MI300X
<https://github.com/ROCm/megablocks>`__ repository, which differs from the - **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
`https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
- To get started and install Megablocks on ROCm, use the prebuilt :ref:`Docker image <megablocks-docker-compat>`, Supported models and features
which includes ROCm, Megablocks, and all required dependencies. ================================================================================
- See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` This section summarizes the Megablocks features supported by ROCm.
for installation and setup instructions.
* Distributed Pre-training
* Activation Checkpointing and Recomputation
* Distributed Optimizer
* Mixture-of-Experts
* dropless-Mixture-of-Experts
.. _megablocks-recommendations:
Use cases and recommendations
================================================================================
The `ROCm Megablocks blog posts <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_
guide how to leverage the ROCm platform for pre-training using the Megablocks framework.
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
* Single-GPU pre-training
* Multi-GPU pre-training
- You can also consult the upstream `Installation guide <https://github.com/databricks/megablocks>`__
for additional context.
.. _megablocks-docker-compat: .. _megablocks-docker-compat:
Compatibility matrix Docker image compatibility
================================================================================ ================================================================================
.. |docker-icon| raw:: html .. |docker-icon| raw:: html
<i class="fab fa-docker"></i> <i class="fab fa-docker"></i>
AMD validates and publishes `Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`__ AMD validates and publishes `ROCm Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`_
with ROCm backends on Docker Hub. The following Docker image tag and associated with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest available Megablocks version from the official Docker Hub. inventories represent the latest Megatron-LM version from the official Docker Hub.
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
Click |docker-icon| to view the image on Docker Hub. Click |docker-icon| to view the image on Docker Hub.
.. list-table:: .. list-table::
@@ -57,7 +80,6 @@ Click |docker-icon| to view the image on Docker Hub.
- PyTorch - PyTorch
- Ubuntu - Ubuntu
- Python - Python
- GPU
* - .. raw:: html * - .. raw:: html
@@ -67,38 +89,5 @@ Click |docker-icon| to view the image on Docker Hub.
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_ - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 24.04 - 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_ - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
- MI300X
Supported models and features with ROCm 6.3.0
================================================================================
This section summarizes the Megablocks features supported by ROCm.
* Distributed Pre-training
* Activation Checkpointing and Recomputation
* Distributed Optimizer
* Mixture-of-Experts
* dropless-Mixture-of-Experts
.. _megablocks-recommendations:
Use cases and recommendations
================================================================================
* The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs
<https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__
blog post guides how to leverage the ROCm platform for pre-training using the
Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts
(MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it
demonstrates how block-sparse computations can enhance scalability and efficiency in MoE
training. The guide provides step-by-step instructions for setting up the environment,
including cloning the repository, building the Docker image, and running the training container.
Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training
language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE
training workflows for large-scale transformer models.
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
* Single-GPU pre-training
* Multi-GPU pre-training

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: PyTorch compatibility :description: PyTorch compatibility
:keywords: GPU, PyTorch, deep learning, framework compatibility :keywords: GPU, PyTorch compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -15,42 +15,40 @@ deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
using `MIOpen <https://github.com/ROCm/MIOpen>`__ and using `MIOpen <https://github.com/ROCm/MIOpen>`__ and
`RCCL <https://github.com/ROCm/rccl>`__ libraries. `RCCL <https://github.com/ROCm/rccl>`__ libraries.
PyTorch provides two high-level features: ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
to independent compatibility considerations, this results in two distinct
release cycles for PyTorch on ROCm:
- Tensor computation (like NumPy) with strong GPU acceleration - ROCm PyTorch release:
- Deep neural networks built on a tape-based autograd system (rapid computation - Provides the latest version of ROCm but might not necessarily support the
of multiple partial derivatives or gradients) latest stable PyTorch version.
Support overview - Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
================================================================================ preinstalled.
ROCm support for PyTorch is upstreamed into the official PyTorch repository. - ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`__
ROCm development is aligned with the stable release of PyTorch, while upstream
PyTorch testing uses the stable release of ROCm to maintain consistency:
- The ROCm-supported version of PyTorch is maintained in the official `https://github.com/ROCm/pytorch - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
<https://github.com/ROCm/pytorch>`__ repository, which differs from the to get started.
`https://github.com/pytorch/pytorch <https://github.com/pytorch/pytorch>`__ upstream repository.
- To get started and install PyTorch on ROCm, use the prebuilt :ref:`Docker images <pytorch-docker-compat>`, - Official PyTorch release:
which include ROCm, PyTorch, and all required dependencies.
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` - Provides the latest stable version of PyTorch but might not necessarily
for installation and setup instructions. support the latest ROCm version.
- You can also consult the upstream `Installation guide <https://pytorch.org/get-started/locally/>`__ or - Official PyTorch repository: `<https://github.com/pytorch/pytorch>`__
`Previous versions <https://pytorch.org/get-started/previous-versions/>`__ for additional context.
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`__
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`__
to get started.
PyTorch includes tooling that generates HIP source code from the CUDA backend. PyTorch includes tooling that generates HIP source code from the CUDA backend.
This approach allows PyTorch to support ROCm without requiring manual code This approach allows PyTorch to support ROCm without requiring manual code
modifications. For more information, see :doc:`HIPIFY <hipify:index>`. modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
Version support ROCm development is aligned with the stable release of PyTorch, while upstream
-------------------------------------------------------------------------------- PyTorch testing uses the stable release of ROCm to maintain consistency.
AMD releases official `ROCm PyTorch Docker images <https://hub.docker.com/r/rocm/pytorch/tags>`_
quarterly alongside new ROCm releases. These images undergo full AMD testing.
.. _pytorch-recommendations: .. _pytorch-recommendations:
@@ -75,12 +73,12 @@ Use cases and recommendations
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>` * The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
GPU using ROCm. This guide helps users achieve optimal performance for accelerator using ROCm. This guide helps users achieve optimal performance for
deep learning and other high-performance computing tasks on the MI300X deep learning and other high-performance computing tasks on the MI300X
GPU. accelerator.
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>` * The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
describes how PyTorch integrates with ROCm for AI workloads. It outlines the describes how PyTorch integrates with ROCm for AI workloads It outlines the
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
GPU hardware for training and inference tasks in AI applications. GPU hardware for training and inference tasks in AI applications.
@@ -91,12 +89,141 @@ For more use cases and recommendations, see `ROCm PyTorch blog posts <https://ro
Docker image compatibility Docker image compatibility
================================================================================ ================================================================================
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch/tags>`__ .. |docker-icon| raw:: html
with ROCm backends on Docker Hub.
To find the right image tag, see the :ref:`PyTorch on ROCm installation <i class="fab fa-docker"></i>
documentation <rocm-install-on-linux:pytorch-docker-support>` for a list of
available ``rocm/pytorch`` images. AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
with ROCm backends on Docker Hub. The following Docker image tags and associated
inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
Click |docker-icon| to view the image on Docker Hub.
.. list-table:: PyTorch Docker image components
:header-rows: 1
:class: docker-image-compatibility
* - Docker
- PyTorch
- Ubuntu
- Python
- Apex
- torchvision
- TensorBoard
- MAGMA
- UCX
- OMPI
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
- 24.04
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
- 22.04
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
- 24.04
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
- 22.04
- `3.11 <https://www.python.org/downloads/release/python-31113/>`__
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
- 22.04
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
- 24.04
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
- 22.04
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
- 24.04
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
- `master <https://bitbucket.org/icl/magma/src/master/>`__
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
Key ROCm libraries for PyTorch Key ROCm libraries for PyTorch
================================================================================ ================================================================================
@@ -239,8 +366,7 @@ feature set available to developers.
Supported modules and data types Supported modules and data types
================================================================================ ================================================================================
The following section outlines the supported data types, modules, and domain The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
libraries available in PyTorch on ROCm.
Supported data types Supported data types
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
@@ -339,7 +465,7 @@ with ROCm.
* - Library * - Library
- Description - Description
* - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_ * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
- Audio and signal processing library for PyTorch. Provides utilities for - Audio and signal processing library for PyTorch. Provides utilities for
audio I/O, signal and data processing functions, datasets, model audio I/O, signal and data processing functions, datasets, model
implementations, and application components for audio and speech implementations, and application components for audio and speech
@@ -349,7 +475,7 @@ with ROCm.
you need to explicitly move audio data (waveform tensor) to GPU using you need to explicitly move audio data (waveform tensor) to GPU using
``.to('cuda')``. ``.to('cuda')``.
* - `torchtune <https://meta-pytorch.org/torchtune/stable/index.html>`_ * - `torchtune <https://docs.pytorch.org/torchtune/stable/index.html>`_
- PyTorch-native library designed for fine-tuning large language models - PyTorch-native library designed for fine-tuning large language models
(LLMs). Provides supports the full fine-tuning workflow and offers (LLMs). Provides supports the full fine-tuning workflow and offers
compatibility with popular production inference systems. compatibility with popular production inference systems.
@@ -361,12 +487,21 @@ with ROCm.
popular datasets, model architectures, and common image transformations popular datasets, model architectures, and common image transformations
for computer vision applications. for computer vision applications.
* - `torchdata <https://meta-pytorch.org/data/beta/index.html#torchdata>`_ * - `torchtext <https://docs.pytorch.org/text/stable/index.html>`_
- Text processing library for PyTorch. Provides data processing utilities
and popular datasets for natural language processing, including
tokenization, vocabulary management, and text embeddings.
**Note:** ``torchtext`` does not implement ROCm-specific kernels.
ROCm acceleration is provided through the underlying PyTorch framework
and ROCm library integration. Only official release exists.
* - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
- Beta library of common modular data loading primitives for easily - Beta library of common modular data loading primitives for easily
constructing flexible and performant data pipelines, with features still constructing flexible and performant data pipelines, with features still
in prototype stage. in prototype stage.
* - `torchrec <https://meta-pytorch.org/torchrec/>`_ * - `torchrec <https://docs.pytorch.org/torchrec/>`_
- PyTorch domain library for common sparsity and parallelism primitives - PyTorch domain library for common sparsity and parallelism primitives
needed for large-scale recommender systems, enabling authors to train needed for large-scale recommender systems, enabling authors to train
models with large embedding tables shared across many GPUs. models with large embedding tables shared across many GPUs.
@@ -398,101 +533,3 @@ with ROCm.
dispatching. dispatching.
**Note:** Only official release exists. **Note:** Only official release exists.
Key features and enhancements for PyTorch 2.9 with ROCm 7.1.1
================================================================================
- Scaled Dot Product Attention (SDPA) upgraded to use AOTriton version 0.11b.
- Default hipBLASLt support enabled for gfx908 architecture on ROCm 6.3 and later.
- MIOpen now supports channels last memory format for 3D convolutions and batch normalization.
- NHWC convolution operations in MIOpen optimized by eliminating unnecessary transpose operations.
- Improved tensor.item() performance by removing redundant synchronization.
- Enhanced performance for element-wise operations and reduction kernels.
- Added support for grouped GEMM operations through fbgemm_gpu generative AI components.
- Resolved device error in Inductor when using CUDA graph trees with HIP.
- Corrected logsumexp scaling in AOTriton-based SDPA implementation.
- Added stream graph capture status validation in memory copy synchronization functions.
Key features and enhancements for PyTorch 2.8 with ROCm 7.1
================================================================================
- MIOpen deep learning optimizations: Further optimized NHWC BatchNorm feature.
- Added float8 support for the DeepSpeed extension, allowing for decreased
memory footprint and increased throughput in training and inference workloads.
- ``torch.nn.functional.scaled_dot_product_attention`` now calling optimized
flash attention kernel automatically.
Key features and enhancements for PyTorch 2.7/2.8 with ROCm 7.0
================================================================================
- Enhanced TunableOp framework: Introduces ``tensorfloat32`` support for
TunableOp operations, improved offline tuning for ScaledGEMM operations,
submatrix offline tuning capabilities, and better logging for BLAS operations
without bias vectors.
- Expanded GPU architecture support: Provides optimized support for newer GPU
architectures, including gfx1200 and gfx1201 with preferred hipBLASLt backend
selection, along with improvements for gfx950 and gfx1100 Series GPUs.
- Advanced Triton Integration: AOTriton 0.10b introduces official support for
gfx950 and gfx1201, along with experimental support for gfx1101, gfx1151,
gfx1150, and gfx1200.
- Improved element-wise kernel performance: Delivers enhanced vectorized
element-wise kernels with better support for heterogeneous tensor types and
optimized input vectorization for tensors with mixed data types.
- MIOpen deep learning optimizations: Enables NHWC BatchNorm by default on
ROCm 7.0+, provides ``maxpool`` forward and backward performance improvements
targeting ResNet scenarios, and includes updated launch configurations for
better performance.
- Enhanced memory and tensor operations: Features fixes for in-place ``aten``
sum operations with specialized templated kernels, improved 3D tensor
performance with NHWC format, and better handling of memory-bound matrix
multiplication operations.
- Robust testing and quality improvements: Includes comprehensive test suite
updates with improved tolerance handling for Navi3x architectures, generalized
ROCm-specific test conditions, and enhanced unit test coverage for Flash
Attention and Memory Efficient operations.
- Composable Kernel (CK) updates: Features updated CK submodule integration with
the latest optimizations and performance improvements for core mathematical
operations.
- Development and debugging enhancements: Includes improved source handling for
dynamic compilation, better error handling for atomic operations, and enhanced
state checking for trace operations.
- Integrate APEX fused layer normalization, which can have positive impact on
text-to-video models.
- Integrate APEX distributed fused LAMB and distributed fused ADAM, which can
have positive impact on BERT-L and Llama2-SFT.
- FlashAttention v3 has been integrated for AMD GPUs.
- `Pytorch C++ extensions <https://pytorch.org/tutorials/advanced/cpp_extension.html>`_
provide a mechanism for compiling custom operations that can be used during
network training or inference. For AMD platforms, ``amdclang++`` has been
validated as the supported compiler for building these extensions.
Known issues and notes for PyTorch 2.7/2.8 with ROCm 7.0 and ROCm 7.1
================================================================================
- The ``matmul.allow_fp16_reduced_precision_reduction`` and
``matmul.allow_bf16_reduced_precision_reduction`` options under
``torch.backends.cuda`` are not supported. As a result,
reduced-precision reductions using FP16 or BF16 accumulation types are not
available.

View File

@@ -1,114 +0,0 @@
:orphan:
.. meta::
:description: Ray compatibility
:keywords: GPU, Ray, deep learning, framework compatibility
.. version-set:: rocm_version latest
*******************************************************************************
Ray compatibility
*******************************************************************************
Ray is a unified framework for scaling AI and Python applications from your laptop
to a full cluster, without changing your code. Ray consists of `a core distributed
runtime <https://docs.ray.io/en/latest/ray-core/walkthrough.html>`__ and a set of
`AI libraries <https://docs.ray.io/en/latest/ray-air/getting-started.html>`__ for
simplifying machine learning computations.
Ray is a general-purpose framework that runs many types of workloads efficiently.
Any Python application can be scaled with Ray, without extra infrastructure.
Support overview
================================================================================
- The ROCm-supported version of Ray is maintained in the official `https://github.com/ROCm/ray
<https://github.com/ROCm/ray>`__ repository, which differs from the
`https://github.com/ray-project/ray <https://github.com/ray-project/ray>`__ upstream repository.
- To get started and install Ray on ROCm, use the prebuilt :ref:`Docker image <ray-docker-compat>`,
which includes ROCm, Ray, and all required dependencies.
- See the :doc:`ROCm Ray installation guide <rocm-install-on-linux:install/3rd-party/ray-install>`
for installation and setup instructions.
- You can also consult the upstream `Installation guide <https://docs.ray.io/en/latest/ray-overview/installation.html>`__
for additional context.
.. _ray-docker-compat:
Compatibility matrix
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `ROCm Ray Docker images <https://hub.docker.com/r/rocm/ray/tags>`__
with ROCm backends on Docker Hub. The following Docker image tags and
associated inventories represent the latest Ray version from the official Docker Hub.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- ROCm
- Ray
- Pytorch
- Ubuntu
- Python
- GPU
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/ray/ray-2.51.1_rocm7.0.0_ubuntu22.04_py3.12_pytorch2.9.0/images/sha256-a02f6766b4ba406f88fd7e85707ec86c04b569834d869a08043ec9bcbd672168"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- `2.51.1 <https://github.com/ROCm/ray/tree/release/2.51.1>`__
- 2.9.0a0+git1c57644
- 22.04
- `3.12.12 <https://www.python.org/downloads/release/python-31212/>`__
- MI300X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/ray/ray-2.48.0.post0_rocm6.4.1_ubuntu24.04_py3.12_pytorch2.6.0/images/sha256-0d166fe6bdced38338c78eedfb96eff92655fb797da3478a62dd636365133cc0"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
- `2.48.0.post0 <https://github.com/ROCm/ray/tree/release/2.48.0.post0>`__
- 2.6.0+git684f6f2
- 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`__
- MI300X, MI210
Use cases and recommendations
================================================================================
* The `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm
Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__
blog provides an overview of Volcano Engine Reinforcement Learning (verl)
for large language models (LLMs) and discusses its benefits in large-scale
reinforcement learning from human feedback (RLHF). It uses Ray as part of a
hybrid orchestration engine to schedule and coordinate training and inference
tasks in parallel, enabling optimized resource utilization and potential overlap
between these phases. This dynamic resource allocation strategy significantly
improves overall system efficiency. The blog presents verls performance results,
focusing on throughput and convergence accuracy achieved on AMD Instinct™ MI300X
GPUs. Follow this guide to get started with verl on AMD Instinct GPUs and
accelerate your RLHF training with ROCm-optimized performance.
* The `Exploring Use Cases for Scalable AI: Implementing Ray with ROCm Support for Efficient ML Workflows
<https://rocm.blogs.amd.com/artificial-intelligence/rocm-ray/README.html>`__
blog post describes key use cases such as training and inference for large language models (LLMs),
model serving, hyperparameter tuning, reinforcement learning, and the orchestration of large-scale
workloads using Ray in the ROCm environment.
For more use cases and recommendations, see the AMD GPU tabs in the `Accelerator Support
topic <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#accelerator-support>`__
of the Ray core documentation and refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
where you can search for Ray examples and best practices to optimize your workloads on AMD GPUs.
Previous versions
===============================================================================
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/ray-history` to find documentation for previous releases
of the ``ROCm/ray`` Docker image.

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: Stanford Megatron-LM compatibility :description: Stanford Megatron-LM compatibility
:keywords: Stanford, Megatron-LM, deep learning, framework compatibility :keywords: Stanford, Megatron-LM, compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -10,76 +10,34 @@
Stanford Megatron-LM compatibility Stanford Megatron-LM compatibility
******************************************************************************** ********************************************************************************
Stanford Megatron-LM is a large-scale language model training framework developed Stanford Megatron-LM is a large-scale language model training framework developed by NVIDIA `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. It is
by NVIDIA at `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. designed to train massive transformer-based language models efficiently by model and data parallelism.
It is designed to train massive transformer-based language models efficiently by model
and data parallelism.
It provides efficient tensor, pipeline, and sequence-based model parallelism for * ROCm support for Stanford Megatron-LM is hosted in the official `https://github.com/ROCm/Stanford-Megatron-LM <https://github.com/ROCm/Stanford-Megatron-LM>`_ repository.
pre-training transformer-based language models such as GPT (Decoder Only), BERT * Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
(Encoder Only), and T5 (Encoder-Decoder). * Use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>` with ROCm, PyTorch, and Megatron-LM preinstalled.
* See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` to install and get started.
Support overview .. note::
Stanford Megatron-LM is supported on ROCm 6.3.0.
Supported Devices
================================================================================ ================================================================================
- The ROCm-supported version of Stanford Megatron-LM is maintained in the official `https://github.com/ROCm/Stanford-Megatron-LM - **Officially Supported**: AMD Instinct MI300X
<https://github.com/ROCm/Stanford-Megatron-LM>`__ repository, which differs from the - **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
`https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
- To get started and install Stanford Megatron-LM on ROCm, use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>`,
which includes ROCm, Stanford Megatron-LM, and all required dependencies.
- See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` Supported models and features
for installation and setup instructions.
- You can also consult the upstream `Installation guide <https://github.com/NVIDIA/Megatron-LM>`__
for additional context.
.. _megatron-lm-docker-compat:
Compatibility matrix
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/stanford-megatron-lm/tags>`_
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest Stanford Megatron-LM version from the official Docker Hub.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- ROCm
- Stanford Megatron-LM
- PyTorch
- Ubuntu
- Python
- GPU
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i> rocm/stanford-megatron-lm</a>
- `6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_
- `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
- MI300X
Supported models and features with ROCm 6.3.0
================================================================================ ================================================================================
This section details models & features that are supported by the ROCm version on Stanford Megatron-LM. This section details models & features that are supported by the ROCm version on Stanford Megatron-LM.
Models: Models:
* BERT * Bert
* GPT * GPT
* T5 * T5
* ICT * ICT
@@ -96,21 +54,47 @@ Features:
Use cases and recommendations Use cases and recommendations
================================================================================ ================================================================================
The following blog post mentions Megablocks, but you can run Stanford Megatron-LM with the same steps to pre-process datasets on AMD GPUs: See the `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs blog <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ post
to leverage the ROCm platform for pre-training by using the Stanford Megatron-LM framework of pre-processing datasets on AMD GPUs.
Coverage includes:
* The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs * Single-GPU pre-training
<https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__ * Multi-GPU pre-training
blog post guides how to leverage the ROCm platform for pre-training using the
Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts
(MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it
demonstrates how block-sparse computations can enhance scalability and efficiency in MoE
training. The guide provides step-by-step instructions for setting up the environment,
including cloning the repository, building the Docker image, and running the training container.
Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training
language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE
training workflows for large-scale transformer models.
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
* Single-GPU pre-training .. _megatron-lm-docker-compat:
* Multi-GPU pre-training
Docker image compatibility
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/megatron-lm>`_
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest Megatron-LM version from the official Docker Hub.
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- Stanford Megatron-LM
- PyTorch
- Ubuntu
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i></a>
- `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_

View File

@@ -0,0 +1,76 @@
:orphan:
.. meta::
:description: Taichi compatibility
:keywords: GPU, Taichi compatibility
.. version-set:: rocm_version latest
*******************************************************************************
Taichi compatibility
*******************************************************************************
`Taichi <https://www.taichi-lang.org/>`_ is an open-source, imperative, and parallel
programming language designed for high-performance numerical computation.
Embedded in Python, it leverages just-in-time (JIT) compilation frameworks such as LLVM to accelerate
compute-intensive Python code by compiling it to native GPU or CPU instructions.
Taichi is widely used across various domains, including real-time physical simulation,
numerical computing, augmented reality, artificial intelligence, computer vision, robotics,
visual effects in film and gaming, and general-purpose computing.
* ROCm support for Taichi is hosted in the official `https://github.com/ROCm/taichi <https://github.com/ROCm/taichi>`_ repository.
* Due to independent compatibility considerations, this location differs from the `https://github.com/taichi-dev <https://github.com/taichi-dev>`_ upstream repository.
* Use the prebuilt :ref:`Docker image <taichi-docker-compat>` with ROCm, PyTorch, and Taichi preinstalled.
* See the :doc:`ROCm Taichi installation guide <rocm-install-on-linux:install/3rd-party/taichi-install>` to install and get started.
.. note::
Taichi is supported on ROCm 6.3.2.
Supported devices and features
===============================================================================
There is support through the ROCm software stack for all Taichi GPU features on AMD Instinct MI250X and MI210X series GPUs with the exception of Taichis GPU rendering system, CGUI.
AMD Instinct MI300X series GPUs will be supported by November.
.. _taichi-recommendations:
Use cases and recommendations
================================================================================
To fully leverage Taichi's performance capabilities in compute-intensive tasks, it is best to adhere to specific coding patterns and utilize Taichi decorators.
A collection of example use cases is available in the `https://github.com/ROCm/taichi_examples <https://github.com/ROCm/taichi_examples>`_ repository,
providing practical insights and foundational knowledge for working with the Taichi programming language.
You can also refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_ to search for Taichi examples and best practices to optimize your workflows on AMD GPUs.
.. _taichi-docker-compat:
Docker image compatibility
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes ready-made `ROCm Taichi Docker images <https://hub.docker.com/r/rocm/taichi/tags>`_
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories
represent the latest Taichi version from the official Docker Hub.
The Docker images have been validated for `ROCm 6.3.2 <https://rocm.docs.amd.com/en/docs-6.3.2/about/release-notes.html>`_.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- ROCm
- Taichi
- Ubuntu
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/taichi/taichi-1.8.0b1_rocm6.3.2_ubuntu22.04_py3.10.12/images/sha256-e016964a751e6a92199032d23e70fa3a564fff8555afe85cd718f8aa63f11fc6"><i class="fab fa-docker fa-lg"></i> rocm/taichi</a>
- `6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_
- `1.8.0b1 <https://github.com/taichi-dev/taichi>`_
- 22.04
- `3.10.12 <https://www.python.org/downloads/release/python-31012/>`_

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: TensorFlow compatibility :description: TensorFlow compatibility
:keywords: GPU, TensorFlow, deep learning, framework compatibility :keywords: GPU, TensorFlow compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -12,46 +12,115 @@ TensorFlow compatibility
`TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for `TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for
solving machine learning, deep learning, and AI problems. It can solve many solving machine learning, deep learning, and AI problems. It can solve many
problems across different sectors and industries, but primarily focuses on problems across different sectors and industries but primarily focuses on
neural network training and inference. It is one of the most popular deep neural network training and inference. It is one of the most popular and
learning frameworks and is very active in open-source development. in-demand frameworks and is very active in open-source contribution and
development.
Support overview
================================================================================
- The ROCm-supported version of TensorFlow is maintained in the official `https://github.com/ROCm/tensorflow-upstream
<https://github.com/ROCm/tensorflow-upstream>`__ repository, which differs from the
`https://github.com/tensorflow/tensorflow <https://github.com/tensorflow/tensorflow>`__ upstream repository.
- To get started and install TensorFlow on ROCm, use the prebuilt :ref:`Docker images <tensorflow-docker-compat>`,
which include ROCm, TensorFlow, and all required dependencies.
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
for installation and setup instructions.
- You can also consult the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list
for additional context.
Version support
--------------------------------------------------------------------------------
The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__ The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__
includes full ROCm support. AMD maintains a TensorFlow `ROCm repository includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
<http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug <http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug
fixes, updates, and support for the latest ROCm versions. fixes, updates, and support for the latest ROCM versions.
- ROCm TensorFlow release:
- Offers :ref:`Docker images <tensorflow-docker-compat>` with
ROCm and TensorFlow pre-installed.
- ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`__
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
to get started.
- Official TensorFlow release:
- Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`__
- See the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list.
.. note::
The official TensorFlow documentation does not cover ROCm support. Use the
ROCm documentation for installation instructions for Tensorflow on ROCm.
See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
.. _tensorflow-docker-compat: .. _tensorflow-docker-compat:
Docker image compatibility Docker image compatibility
================================================================================ ===============================================================================
AMD provides preconfigured Docker images with TensorFlow and the ROCm backend. .. |docker-icon| raw:: html
These images are published on `Docker Hub <https://hub.docker.com/r/rocm/tensorflow>`__ and are the
recommended way to get started with deep learning with TensorFlow on ROCm.
To find the right image tag, see the :ref:`TensorFlow on ROCm installation <i class="fab fa-docker"></i>
documentation <rocm-install-on-linux:tensorflow-docker-support>` for a list of
available ``rocm/tensorflow`` images. AMD validates and publishes ready-made `TensorFlow images
<https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
Docker Hub. The following Docker image tags and associated inventories are
validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
the |docker-icon| icon to view the image on Docker Hub.
.. list-table:: TensorFlow Docker image components
:header-rows: 1
* - Docker image
- TensorFlow
- Ubuntu
- Python
- TensorBoard
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- 24.04
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- 22.04
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- 24.04
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- 22.04
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- 24.04
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- 22.04
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
Critical ROCm libraries for TensorFlow Critical ROCm libraries for TensorFlow
@@ -136,7 +205,7 @@ The following section maps supported data types and GPU-accelerated TensorFlow
features to their minimum supported ROCm and TensorFlow versions. features to their minimum supported ROCm and TensorFlow versions.
Data types Data types
--------------- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The data type of a tensor is specified using the ``dtype`` attribute or The data type of a tensor is specified using the ``dtype`` attribute or
argument, and TensorFlow supports a wide range of data types for different use argument, and TensorFlow supports a wide range of data types for different use
@@ -254,7 +323,7 @@ are as follows:
- 1.7 - 1.7
Features Features
--------------- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This table provides an overview of key features in TensorFlow and their This table provides an overview of key features in TensorFlow and their
availability in ROCm. availability in ROCm.
@@ -346,7 +415,7 @@ availability in ROCm.
- 1.9.2 - 1.9.2
Distributed library features Distributed library features
----------------------------------- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Enables developers to scale computations across multiple devices on a single machine or Enables developers to scale computations across multiple devices on a single machine or
across multiple machines. across multiple machines.

View File

@@ -2,7 +2,7 @@
.. meta:: .. meta::
:description: verl compatibility :description: verl compatibility
:keywords: GPU, verl, deep learning, framework compatibility :keywords: GPU, verl compatibility
.. version-set:: rocm_version latest .. version-set:: rocm_version latest
@@ -10,109 +10,77 @@
verl compatibility verl compatibility
******************************************************************************* *******************************************************************************
Volcano Engine Reinforcement Learning for LLMs (`verl <https://verl.readthedocs.io/en/latest/>`__) Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs).
is a reinforcement learning framework designed for large language models (LLMs). verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support.
verl offers a scalable, open-source fine-tuning solution by using a hybrid programming model
that makes it easy to define and run complex post-training dataflows efficiently.
Its modular APIs separate computation from data, allowing smooth integration with other frameworks. * See the `verl documentation <https://verl.readthedocs.io/en/latest/>`_ for more information about verl.
It also supports flexible model placement across GPUs for efficient scaling on different cluster sizes. * The official verl GitHub repository is `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`_.
verl achieves high training and generation throughput by building on existing LLM frameworks. * Use the AMD-validated :ref:`Docker images <verl-docker-compat>` with ROCm and verl preinstalled.
Its 3D-HybridEngine reduces memory use and communication overhead when switching between training * See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>` to install and get started.
and inference, improving overall performance.
Support overview .. note::
================================================================================
- The ROCm-supported version of verl is maintained in the official `https://github.com/ROCm/verl verl is supported on ROCm 6.2.0.
<https://github.com/ROCm/verl>`__ repository, which differs from the
`https://github.com/volcengine/verl <https://github.com/volcengine/verl>`__ upstream repository.
- To get started and install verl on ROCm, use the prebuilt :ref:`Docker image <verl-docker-compat>`,
which includes ROCm, verl, and all required dependencies.
- See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>`
for installation and setup instructions.
- You can also consult the upstream `verl documentation <https://verl.readthedocs.io/en/latest/>`__
for additional context.
.. _verl-docker-compat:
Compatibility matrix
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes `verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
with ROCm backends on Docker Hub. The following Docker image tag and associated inventories
represent the latest verl version from the official Docker Hub.
Click |docker-icon| to view the image on Docker Hub.
.. list-table::
:header-rows: 1
:class: docker-image-compatibility
* - Docker image
- ROCm
- verl
- Ubuntu
- PyTorch
- Python
- vllm
- GPU
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.6.0.amd0_rocm7.0_vllm0.11.0.dev/images/sha256-f70a3ebc94c1f66de42a2fcc3f8a6a8d6d0881eb0e65b6958d7d6d24b3eecb0d"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- `0.6.0 <https://github.com/volcengine/verl/releases/tag/v0.6.0>`__
- 22.04
- `2.9.0 <https://github.com/ROCm/pytorch/tree/release/2.9-rocm7.x-gfx115x>`__
- `3.12.11 <https://www.python.org/downloads/release/python-31211/>`__
- `0.11.0 <https://github.com/vllm-project/vllm/releases/tag/v0.11.0>`__
- MI300X
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
- `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`__
- `0.3.0.post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`__
- 20.04
- `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
- `3.9.19 <https://www.python.org/downloads/release/python-3919/>`__
- `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`__
- MI300X
.. _verl-supported_features:
Supported modules with verl on ROCm
===============================================================================
The following GPU-accelerated modules are supported with verl on ROCm:
- ``FSDP``: Training engine
- ``vllm``: Inference engine
.. _verl-recommendations: .. _verl-recommendations:
Use cases and recommendations Use cases and recommendations
================================================================================ ================================================================================
* The benefits of verl in large-scale reinforcement learning from human feedback The benefits of verl in large-scale reinforcement learning from human feedback (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`_ blog.
(RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD
GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__
blog. The blog post outlines how the Volcano Engine Reinforcement Learning
(verl) framework integrates with the AMD ROCm platform to optimize training on
AMD Instinct™ GPUs. The guide details the process of building a Docker image,
setting up single-node and multi-node training environments, and highlights
performance benchmarks demonstrating improved throughput and convergence accuracy.
This resource serves as a comprehensive starting point for deploying verl on AMD GPUs,
facilitating efficient RLHF training workflows.
Previous versions .. _verl-supported_features:
Supported features
=============================================================================== ===============================================================================
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/verl-history` to find documentation for previous releases
of the ``ROCm/verl`` Docker image. The following table shows verl on ROCm support for GPU-accelerated modules.
.. list-table::
:header-rows: 1
* - Module
- Description
- verl version
- ROCm version
* - ``FSDP``
- Training engine
- 0.3.0.post0
- 6.2.0
* - ``vllm``
- Inference engine
- 0.3.0.post0
- 6.2.0
.. _verl-docker-compat:
Docker image compatibility
================================================================================
.. |docker-icon| raw:: html
<i class="fab fa-docker"></i>
AMD validates and publishes ready-made `ROCm verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the available verl versions from the official Docker Hub.
.. list-table::
:header-rows: 1
* - Docker image
- ROCm
- verl
- Ubuntu
- Pytorch
- Python
- vllm
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
- `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`_
- `0.3.0post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`_
- 20.04
- `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
- `3.9.19 <https://www.python.org/downloads/release/python-3919/>`_
- `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`_

View File

@@ -13,22 +13,21 @@
:gutter: 1 :gutter: 1
:::{grid-item-card} :::{grid-item-card}
**AMD Instinct MI300 Series** **AMD Instinct MI300 series**
Review hardware aspects of the AMD Instinct™ MI300 Series GPUs and the CDNA™ 3 Review hardware aspects of the AMD Instinct™ MI300 series of GPU accelerators and the CDNA™ 3
architecture. architecture.
* [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md) * [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
* [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf) * [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf) * [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
* [MI300 performance counters](./gpu-arch/mi300-mi200-performance-counters.rst) * [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
* [MI350 Series performance counters](./gpu-arch/mi350-performance-counters.rst)
::: :::
:::{grid-item-card} :::{grid-item-card}
**AMD Instinct MI200 Series** **AMD Instinct MI200 series**
Review hardware aspects of the AMD Instinct™ MI200 Series GPUs and the CDNA™ 2 Review hardware aspects of the AMD Instinct™ MI200 series of GPU accelerators and the CDNA™ 2
architecture. architecture.
* [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md) * [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md)
@@ -41,7 +40,7 @@ architecture.
:::{grid-item-card} :::{grid-item-card}
**AMD Instinct MI100** **AMD Instinct MI100**
Review hardware aspects of the AMD Instinct™ MI100 Series GPUs and the CDNA™ 1 Review hardware aspects of the AMD Instinct™ MI100 series of GPU accelerators and the CDNA™ 1
architecture. architecture.
* [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md) * [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md)

View File

@@ -1,14 +1,14 @@
--- ---
myst: myst:
html_meta: html_meta:
"description lang=en": "Learn about the AMD Instinct MI100 Series architecture." "description lang=en": "Learn about the AMD Instinct MI100 series architecture."
"keywords": "Instinct, MI100, microarchitecture, AMD, ROCm" "keywords": "Instinct, MI100, microarchitecture, AMD, ROCm"
--- ---
# AMD Instinct™ MI100 microarchitecture # AMD Instinct™ MI100 microarchitecture
The following image shows the node-level architecture of a system that The following image shows the node-level architecture of a system that
comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ GPUs. comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ accelerators.
The two EPYC processors are connected to each other with the AMD Infinity™ The two EPYC processors are connected to each other with the AMD Infinity™
fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such
that each processor can access the available node memory as a single that each processor can access the available node memory as a single
@@ -18,29 +18,29 @@ available to connect the processors plus one PCIe Gen 4 x16 link per processor
can attach additional I/O devices such as the host adapters for the network can attach additional I/O devices such as the host adapters for the network
fabric. fabric.
![Structure of a single GCD in the AMD Instinct MI100 GPU](../../data/conceptual/gpu-arch/image004.png "Node-level system architecture with two AMD EPYC™ processors and eight AMD Instinct™ GPUs.") ![Structure of a single GCD in the AMD Instinct MI100 accelerator](../../data/conceptual/gpu-arch/image004.png "Node-level system architecture with two AMD EPYC™ processors and eight AMD Instinct™ accelerators.")
In a typical node configuration, each processor can host up to four AMD In a typical node configuration, each processor can host up to four AMD
Instinct™ GPUs that are attached using PCIe Gen 4 links at 16 GT/sec, Instinct™ accelerators that are attached using PCIe Gen 4 links at 16 GT/sec,
which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive
of four GPUs can participate in a fully connected, coherent AMD of four accelerators can participate in a fully connected, coherent AMD
Instinct™ fabric that connects the four GPUs using 23 GT/sec AMD Instinct™ fabric that connects the four accelerators using 23 GT/sec AMD
Infinity fabric links that run at a higher frequency than the inter-processor Infinity fabric links that run at a higher frequency than the inter-processor
links. This inter-GPU link can be established in certified server systems if the links. This inter-GPU link can be established in certified server systems if the
GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity
Fabric™ bridge for the AMD Instinct™ GPUs. Fabric™ bridge for the AMD Instinct™ accelerators.
## Microarchitecture ## Microarchitecture
The microarchitecture of the AMD Instinct GPUs is based on the AMD CDNA The microarchitecture of the AMD Instinct accelerators is based on the AMD CDNA
architecture, which targets compute applications such as high-performance architecture, which targets compute applications such as high-performance
computing (HPC) and AI & machine learning (ML) that run on everything from computing (HPC) and AI & machine learning (ML) that run on everything from
individual servers to the world's largest exascale supercomputers. The overall individual servers to the world's largest exascale supercomputers. The overall
system architecture is designed for extreme scalability and compute performance. system architecture is designed for extreme scalability and compute performance.
![Structure of the AMD Instinct GPU (MI100 generation)](../../data/conceptual/gpu-arch/image005.png "Structure of the AMD Instinct GPU (MI100 generation)") ![Structure of the AMD Instinct accelerator (MI100 generation)](../../data/conceptual/gpu-arch/image005.png "Structure of the AMD Instinct accelerator (MI100 generation)")
The above image shows the AMD Instinct GPU with its PCIe Gen 4 x16 The above image shows the AMD Instinct accelerator with its PCIe Gen 4 x16
link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host
processor(s). It also shows the three AMD Infinity Fabric ports that provide processor(s). It also shows the three AMD Infinity Fabric ports that provide
high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local
@@ -48,7 +48,7 @@ hive.
On the left and right of the floor plan, the High Bandwidth Memory (HBM) On the left and right of the floor plan, the High Bandwidth Memory (HBM)
attaches via the GPU memory controller. The MI100 generation of the AMD attaches via the GPU memory controller. The MI100 generation of the AMD
Instinct GPU offers four stacks of HBM generation 2 (HBM2) for a total Instinct accelerator offers four stacks of HBM generation 2 (HBM2) for a total
of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the
attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz. attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz.
@@ -64,7 +64,7 @@ Therefore, the theoretical maximum FP64 peak performance is 11.5 TFLOPS
![Block diagram of an MI100 compute unit with detailed SIMD view of the AMD CDNA architecture](../../data/conceptual/gpu-arch/image006.png "An MI100 compute unit with detailed SIMD view of the AMD CDNA architecture") ![Block diagram of an MI100 compute unit with detailed SIMD view of the AMD CDNA architecture](../../data/conceptual/gpu-arch/image006.png "An MI100 compute unit with detailed SIMD view of the AMD CDNA architecture")
The preceding image shows the block diagram of a single CU of an AMD Instinct™ The preceding image shows the block diagram of a single CU of an AMD Instinct™
MI100 GPU and summarizes how instructions flow through the execution MI100 accelerator and summarizes how instructions flow through the execution
engines. The CU fetches the instructions via a 32KB instruction cache and moves engines. The CU fetches the instructions via a 32KB instruction cache and moves
them forward to execution via a dispatcher. The CU can handle up to ten them forward to execution via a dispatcher. The CU can handle up to ten
wavefronts at a time and feed their instructions into the execution unit. The wavefronts at a time and feed their instructions into the execution unit. The

View File

@@ -1,13 +1,13 @@
--- ---
myst: myst:
html_meta: html_meta:
"description lang=en": "Learn about the AMD Instinct MI250 Series architecture." "description lang=en": "Learn about the AMD Instinct MI250 series architecture."
"keywords": "Instinct, MI250, microarchitecture, AMD, ROCm" "keywords": "Instinct, MI250, microarchitecture, AMD, ROCm"
--- ---
# AMD Instinct™ MI250 microarchitecture # AMD Instinct™ MI250 microarchitecture
The microarchitecture of the AMD Instinct MI250 GPU is based on the The microarchitecture of the AMD Instinct MI250 accelerators is based on the
AMD CDNA 2 architecture that targets compute applications such as HPC, AMD CDNA 2 architecture that targets compute applications such as HPC,
artificial intelligence (AI), and machine learning (ML) and that run on artificial intelligence (AI), and machine learning (ML) and that run on
everything from individual servers to the worlds largest exascale everything from individual servers to the worlds largest exascale
@@ -40,7 +40,7 @@ execution units (also called matrix cores), which are geared toward executing
matrix operations like matrix-matrix multiplications. For FP64, the peak matrix operations like matrix-matrix multiplications. For FP64, the peak
performance of these units amounts to 90.5 TFLOPS. performance of these units amounts to 90.5 TFLOPS.
![Structure of a single GCD in the AMD Instinct MI250 GPU.](../../data/conceptual/gpu-arch/image001.png "Structure of a single GCD in the AMD Instinct MI250 GPU.") ![Structure of a single GCD in the AMD Instinct MI250 accelerator.](../../data/conceptual/gpu-arch/image001.png "Structure of a single GCD in the AMD Instinct MI250 accelerator.")
```{list-table} Peak-performance capabilities of the MI250 OAM for different data types. ```{list-table} Peak-performance capabilities of the MI250 OAM for different data types.
:header-rows: 1 :header-rows: 1
@@ -84,9 +84,16 @@ performance of these units amounts to 90.5 TFLOPS.
- 362.1 - 362.1
``` ```
The above table summarizes the aggregated peak performance of the AMD Instinct MI250 Open Compute Platform (OCP) Open Accelerator Modules (OAMs) and its two GCDs for different data types and execution units. The middle column lists the peak performance (number of data elements processed in a single instruction) of a single compute unit if a SIMD (or matrix) instruction is being retired in each clock cycle. The third column lists the theoretical peak performance of the OAM module. The theoretical aggregated peak memory bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD). The above table summarizes the aggregated peak performance of the AMD
Instinct MI250 OCP Open Accelerator Modules (OAM, OCP is short for Open Compute
Platform) and its two GCDs for different data types and execution units. The
middle column lists the peak performance (number of data elements processed in a
single instruction) of a single compute unit if a SIMD (or matrix) instruction
is being retired in each clock cycle. The third column lists the theoretical
peak performance of the OAM module. The theoretical aggregated peak memory
bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
![Dual-GCD architecture of the AMD Instinct MI250 GPUs](../../data/conceptual/gpu-arch/image002.png "Dual-GCD architecture of the AMD Instinct MI250 GPUs") ![Dual-GCD architecture of the AMD Instinct MI250 accelerators](../../data/conceptual/gpu-arch/image002.png "Dual-GCD architecture of the AMD Instinct MI250 accelerators")
The following image shows the block diagram of an OAM package that consists The following image shows the block diagram of an OAM package that consists
of two GCDs, each of which constitutes one GPU device in the system. The two of two GCDs, each of which constitutes one GPU device in the system. The two
@@ -98,18 +105,18 @@ between the two GCDs of an OAM, or a bidirectional peak transfer bandwidth of
## Node-level architecture ## Node-level architecture
The following image shows the node-level architecture of a system that is The following image shows the node-level architecture of a system that is
based on the AMD Instinct MI250 GPU. The MI250 OAMs attach to the host based on the AMD Instinct MI250 accelerator. The MI250 OAMs attach to the host
system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe
x16 link to the host part of the system. Depending on the server platform, the x16 link to the host part of the system. Depending on the server platform, the
GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch
. Note that some platforms may offer an x8 interface to the GCDs, which reduces . Note that some platforms may offer an x8 interface to the GCDs, which reduces
the available host-to-GPU bandwidth. the available host-to-GPU bandwidth.
![Block diagram of AMD Instinct MI250 GPUs with 3rd Generation AMD EPYC processor](../../data/conceptual/gpu-arch/image003.png "Block diagram of AMD Instinct MI250 GPUs with 3rd Generation AMD EPYC processor") ![Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor](../../data/conceptual/gpu-arch/image003.png "Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor")
The preceding image shows the node-level architecture of a system with AMD The preceding image shows the node-level architecture of a system with AMD
EPYC processors in a dual-socket configuration and four AMD Instinct MI250 EPYC processors in a dual-socket configuration and four AMD Instinct MI250
GPUs. The MI250 OAMs attach to the host processors system via PCIe Gen 4 accelerators. The MI250 OAMs attach to the host processors system via PCIe Gen 4
x16 links (yellow lines). Depending on the system design, a PCIe switch may x16 links (yellow lines). Depending on the system design, a PCIe switch may
exist to make more PCIe lanes available for additional components like network exist to make more PCIe lanes available for additional components like network
interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to

View File

@@ -1,16 +1,16 @@
.. meta:: .. meta::
:description: MI300 and MI200 Series performance counters and metrics :description: MI300 and MI200 series performance counters and metrics
:keywords: MI300, MI200, performance counters, command processor counters :keywords: MI300, MI200, performance counters, command processor counters
*************************************************************************************************** ***************************************************************************************************
MI300 and MI200 Series performance counters and metrics MI300 and MI200 series performance counters and metrics
*************************************************************************************************** ***************************************************************************************************
This document lists and describes the hardware performance counters and derived metrics available This document lists and describes the hardware performance counters and derived metrics available
for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the
:doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`. :doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`.
MI300 and MI200 Series performance counters MI300 and MI200 series performance counters
=============================================================== ===============================================================
Series performance counters include the following categories: Series performance counters include the following categories:
@@ -27,7 +27,7 @@ The following sections provide additional details for each category.
.. note:: .. note::
Preliminary validation of all MI300 and MI200 Series performance counters is in progress. Those with Preliminary validation of all MI300 and MI200 series performance counters is in progress. Those with
an asterisk (*) require further evaluation. an asterisk (*) require further evaluation.
.. _command-processor-counters: .. _command-processor-counters:
@@ -171,7 +171,7 @@ Instruction mix
"``SQ_INSTS_SMEM``", "Instr", "Number of scalar memory instructions issued" "``SQ_INSTS_SMEM``", "Instr", "Number of scalar memory instructions issued"
"``SQ_INSTS_SMEM_NORM``", "Instr", "Number of scalar memory instructions normalized to match ``smem_level`` issued" "``SQ_INSTS_SMEM_NORM``", "Instr", "Number of scalar memory instructions normalized to match ``smem_level`` issued"
"``SQ_INSTS_FLAT``", "Instr", "Number of flat instructions issued" "``SQ_INSTS_FLAT``", "Instr", "Number of flat instructions issued"
"``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 Series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled." "``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
"``SQ_INSTS_LDS``", "Instr", "Number of LDS instructions issued **(MI200: includes flat; MI300: does not include flat)**" "``SQ_INSTS_LDS``", "Instr", "Number of LDS instructions issued **(MI200: includes flat; MI300: does not include flat)**"
"``SQ_INSTS_GDS``", "Instr", "Number of global data share instructions issued" "``SQ_INSTS_GDS``", "Instr", "Number of global data share instructions issued"
"``SQ_INSTS_EXP_GDS``", "Instr", "Number of EXP and global data share instructions excluding skipped export instructions issued" "``SQ_INSTS_EXP_GDS``", "Instr", "Number of EXP and global data share instructions excluding skipped export instructions issued"
@@ -396,9 +396,9 @@ Texture cache per pipe counters
"``TCP_UTCL1_TRANSLATION_MISS[n]``", "Req", "Number of unified translation cache (L1) translation misses", "0-15" "``TCP_UTCL1_TRANSLATION_MISS[n]``", "Req", "Number of unified translation cache (L1) translation misses", "0-15"
"``TCP_UTCL1_PERMISSION_MISS[n]``", "Req", "Number of unified translation cache (L1) permission misses", "0-15" "``TCP_UTCL1_PERMISSION_MISS[n]``", "Req", "Number of unified translation cache (L1) permission misses", "0-15"
"``TCP_TOTAL_CACHE_ACCESSES[n]``", "Req", "Number of vector L1d cache accesses including hits and misses", "0-15" "``TCP_TOTAL_CACHE_ACCESSES[n]``", "Req", "Number of vector L1d cache accesses including hits and misses", "0-15"
"``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 Series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15" "``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
"``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15" "``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
"``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15" "``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
"``TCP_TCC_READ_REQ[n]``", "Req", "Number of read requests to L2 cache", "0-15" "``TCP_TCC_READ_REQ[n]``", "Req", "Number of read requests to L2 cache", "0-15"
"``TCP_TCC_WRITE_REQ[n]``", "Req", "Number of write requests to L2 cache", "0-15" "``TCP_TCC_WRITE_REQ[n]``", "Req", "Number of write requests to L2 cache", "0-15"
"``TCP_TCC_ATOMIC_WITH_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache with return", "0-15" "``TCP_TCC_ATOMIC_WITH_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache with return", "0-15"
@@ -560,7 +560,7 @@ Note the following:
``TCC_TAG_STALL[n]``, probes can stall the pipeline at a variety of places. There is no single point that ``TCC_TAG_STALL[n]``, probes can stall the pipeline at a variety of places. There is no single point that
can accurately measure the total stalls can accurately measure the total stalls
MI300 and MI200 Series derived metrics list MI300 and MI200 series derived metrics list
============================================================== ==============================================================
.. csv-table:: .. csv-table::

View File

@@ -1,21 +1,21 @@
--- ---
myst: myst:
html_meta: html_meta:
"description lang=en": "Learn about the AMD Instinct MI300 Series architecture." "description lang=en": "Learn about the AMD Instinct MI300 series architecture."
"keywords": "Instinct, MI300X, MI300A, microarchitecture, AMD, ROCm" "keywords": "Instinct, MI300X, MI300A, microarchitecture, AMD, ROCm"
--- ---
# AMD Instinct™ MI300 Series microarchitecture # AMD Instinct™ MI300 series microarchitecture
The AMD Instinct MI300 Series GPUs are based on the AMD CDNA 3 The AMD Instinct MI300 series accelerators are based on the AMD CDNA 3
architecture which was designed to deliver leadership performance for HPC, artificial intelligence (AI), and machine architecture which was designed to deliver leadership performance for HPC, artificial intelligence (AI), and machine
learning (ML) workloads. The AMD Instinct MI300 Series GPUs are well-suited for extreme scalability and compute performance, running learning (ML) workloads. The AMD Instinct MI300 series accelerators are well-suited for extreme scalability and compute performance, running
on everything from individual servers to the worlds largest exascale supercomputers. on everything from individual servers to the worlds largest exascale supercomputers.
With the MI300 Series, AMD is introducing the Accelerator Complex Die (XCD), which contains the With the MI300 series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
GPU computational elements of the processor along with the lower levels of the cache hierarchy. GPU computational elements of the processor along with the lower levels of the cache hierarchy.
The following image depicts the structure of a single XCD in the AMD Instinct MI300 GPU Series. The following image depicts the structure of a single XCD in the AMD Instinct MI300 accelerator series.
```{figure} ../../data/shared/xcd-sys-arch.png ```{figure} ../../data/shared/xcd-sys-arch.png
--- ---
@@ -39,7 +39,7 @@ infrastructure) using the AMD Infinity Fabric™ technology as interconnect.
The Matrix Cores inside the CDNA 3 CUs have significant improvements, emphasizing AI and machine The Matrix Cores inside the CDNA 3 CUs have significant improvements, emphasizing AI and machine
learning, enhancing throughput of existing data types while adding support for new data types. learning, enhancing throughput of existing data types while adding support for new data types.
CDNA 2 Matrix Cores support FP16 and BF16, while offering INT8 for inference. Compared to MI250X CDNA 2 Matrix Cores support FP16 and BF16, while offering INT8 for inference. Compared to MI250X
GPUs, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a accelerators, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
performance gain of 6.8 times for INT8. FP8 has a performance gain of 16 times compared to FP32, performance gain of 6.8 times for INT8. FP8 has a performance gain of 16 times compared to FP32,
while TF32 has a gain of 4 times compared to FP32. while TF32 has a gain of 4 times compared to FP32.
@@ -105,7 +105,7 @@ name: mi300-arch
alt: alt:
align: center align: center
--- ---
MI300 Series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs. MI300 series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
``` ```
## Node-level architecture ## Node-level architecture
@@ -116,11 +116,11 @@ name: mi300-node
align: center align: center
--- ---
MI300 Series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors. MI300 series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
``` ```
The image above shows the node-level architecture of a system with AMD EPYC processors in a The image above shows the node-level architecture of a system with AMD EPYC processors in a
dual-socket configuration and eight AMD Instinct MI300X GPUs. The MI300X OAMs attach to the dual-socket configuration and eight AMD Instinct MI300X accelerators. The MI300X OAMs attach to the
host system via PCIe Gen 5 x16 links (yellow lines). The GPUs are using seven high-bandwidth, host system via PCIe Gen 5 x16 links (yellow lines). The GPUs are using seven high-bandwidth,
low-latency AMD Infinity Fabric™ links (red lines) to form a fully connected 8-GPU system. low-latency AMD Infinity Fabric™ links (red lines) to form a fully connected 8-GPU system.

View File

@@ -1,530 +0,0 @@
.. meta::
:description: MI355 Series performance counters and metrics
:keywords: MI355, MI355X, MI3XX
***********************************
MI350 Series performance counters
***********************************
This topic lists and describes the hardware performance counters and derived metrics available on the AMD Instinct MI350 and MI355 GPUs. These counters are available for profiling using `ROCprofiler-SDK <https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/index.html>`_ and `ROCm Compute Profiler <https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/>`_.
The following sections list the performance counters based on the IP blocks.
Command processor packet processor counters (CPC)
==================================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - CPC_ALWAYS_COUNT
- Always count.
* - CPC_ADC_VALID_CHUNK_NOT_AVAIL
- ADC valid chunk is not available when dispatch walking is in progress in the multi-xcc mode.
* - CPC_ADC_DISPATCH_ALLOC_DONE
- ADC dispatch allocation is done.
* - CPC_ADC_VALID_CHUNK_END
- ADC crawler's valid chunk end in the multi-xcc mode.
* - CPC_SYNC_FIFO_FULL_LEVEL
- SYNC FIFO full last cycles.
* - CPC_SYNC_FIFO_FULL
- SYNC FIFO full times.
* - CPC_GD_BUSY
- ADC busy.
* - CPC_TG_SEND
- ADC thread group send.
* - CPC_WALK_NEXT_CHUNK
- ADC walking next valid chunk in the multi-xcc mode.
* - CPC_STALLED_BY_SE0_SPI
- ADC CSDATA stalled by SE0SPI.
* - CPC_STALLED_BY_SE1_SPI
- ADC CSDATA stalled by SE1SPI.
* - CPC_STALLED_BY_SE2_SPI
- ADC CSDATA stalled by SE2SPI.
* - CPC_STALLED_BY_SE3_SPI
- ADC CSDATA stalled by SE3SPI.
* - CPC_LTE_ALL
- CPC sync counter LteAll. Only Master XCD manages LteAll.
* - CPC_SYNC_WRREQ_FIFO_BUSY
- CPC sync counter request FIFO is not empty.
* - CPC_CANE_BUSY
- CPC CANE bus is busy, which indicates the presence of inflight sync counter requests.
* - CPC_CANE_STALL
- CPC sync counter sending is stalled by CANE.
Shader pipe interpolators (SPI) counters
=========================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - SPI_CS0_WINDOW_VALID
- Clock count enabled by PIPE0 perfcounter_start event.
* - SPI_CS0_BUSY
- Number of clocks with outstanding waves for PIPE0 (SPI or SH).
* - SPI_CS0_NUM_THREADGROUPS
- Number of thread groups launched for PIPE0.
* - SPI_CS0_CRAWLER_STALL
- Number of clocks when PIPE0 event or wave order FIFO is full.
* - SPI_CS0_EVENT_WAVE
- Number of PIPE0 events and waves.
* - SPI_CS0_WAVE
- Number of PIPE0 waves.
* - SPI_CS1_WINDOW_VALID
- Clock count enabled by PIPE1 perfcounter_start event.
* - SPI_CS1_BUSY
- Number of clocks with outstanding waves for PIPE1 (SPI or SH).
* - SPI_CS1_NUM_THREADGROUPS
- Number of thread groups launched for PIPE1.
* - SPI_CS1_CRAWLER_STALL
- Number of clocks when PIPE1 event or wave order FIFO is full.
* - SPI_CS1_EVENT_WAVE
- Number of PIPE1 events and waves.
* - SPI_CS1_WAVE
- Number of PIPE1 waves.
* - SPI_CS2_WINDOW_VALID
- Clock count enabled by PIPE2 perfcounter_start event.
* - SPI_CS2_BUSY
- Number of clocks with outstanding waves for PIPE2 (SPI or SH).
* - SPI_CS2_NUM_THREADGROUPS
- Number of thread groups launched for PIPE2.
* - SPI_CS2_CRAWLER_STALL
- Number of clocks when PIPE2 event or wave order FIFO is full.
* - SPI_CS2_EVENT_WAVE
- Number of PIPE2 events and waves.
* - SPI_CS2_WAVE
- Number of PIPE2 waves.
* - SPI_CS3_WINDOW_VALID
- Clock count enabled by PIPE3 perfcounter_start event.
* - SPI_CS3_BUSY
- Number of clocks with outstanding waves for PIPE3 (SPI or SH).
* - SPI_CS3_NUM_THREADGROUPS
- Number of thread groups launched for PIPE3.
* - SPI_CS3_CRAWLER_STALL
- Number of clocks when PIPE3 event or wave order FIFO is full.
* - SPI_CS3_EVENT_WAVE
- Number of PIPE3 events and waves.
* - SPI_CS3_WAVE
- Number of PIPE3 waves.
* - SPI_CSQ_P0_Q0_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue0.
* - SPI_CSQ_P0_Q1_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue1.
* - SPI_CSQ_P0_Q2_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue2.
* - SPI_CSQ_P0_Q3_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue3.
* - SPI_CSQ_P0_Q4_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue4.
* - SPI_CSQ_P0_Q5_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue5.
* - SPI_CSQ_P0_Q6_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue6.
* - SPI_CSQ_P0_Q7_OCCUPANCY
- Sum of occupancy info for PIPE0 Queue7.
* - SPI_CSQ_P1_Q0_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue0.
* - SPI_CSQ_P1_Q1_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue1.
* - SPI_CSQ_P1_Q2_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue2.
* - SPI_CSQ_P1_Q3_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue3.
* - SPI_CSQ_P1_Q4_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue4.
* - SPI_CSQ_P1_Q5_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue5.
* - SPI_CSQ_P1_Q6_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue6.
* - SPI_CSQ_P1_Q7_OCCUPANCY
- Sum of occupancy info for PIPE1 Queue7.
* - SPI_CSQ_P2_Q0_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue0.
* - SPI_CSQ_P2_Q1_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue1.
* - SPI_CSQ_P2_Q2_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue2.
* - SPI_CSQ_P2_Q3_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue3.
* - SPI_CSQ_P2_Q4_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue4.
* - SPI_CSQ_P2_Q5_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue5.
* - SPI_CSQ_P2_Q6_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue6.
* - SPI_CSQ_P2_Q7_OCCUPANCY
- Sum of occupancy info for PIPE2 Queue7.
* - SPI_CSQ_P3_Q0_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue0.
* - SPI_CSQ_P3_Q1_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue1.
* - SPI_CSQ_P3_Q2_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue2.
* - SPI_CSQ_P3_Q3_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue3.
* - SPI_CSQ_P3_Q4_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue4.
* - SPI_CSQ_P3_Q5_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue5.
* - SPI_CSQ_P3_Q6_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue6.
* - SPI_CSQ_P3_Q7_OCCUPANCY
- Sum of occupancy info for PIPE3 Queue7.
* - SPI_CSQ_P0_OCCUPANCY
- Sum of occupancy info for all PIPE0 queues.
* - SPI_CSQ_P1_OCCUPANCY
- Sum of occupancy info for all PIPE1 queues.
* - SPI_CSQ_P2_OCCUPANCY
- Sum of occupancy info for all PIPE2 queues.
* - SPI_CSQ_P3_OCCUPANCY
- Sum of occupancy info for all PIPE3 queues.
* - SPI_VWC0_VDATA_VALID_WR
- Number of clocks VGPR bus_0 writes VGPRs.
* - SPI_VWC1_VDATA_VALID_WR
- Number of clocks VGPR bus_1 writes VGPRs.
* - SPI_CSC_WAVE_CNT_BUSY
- Number of cycles when there is any wave in the pipe.
Compute unit (SQ) counters
===========================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - SQ_INSTS_VALU_MFMA_F6F4
- Number of VALU V_MFMA_*_F6F4 instructions.
* - SQ_INSTS_VALU_MFMA_MOPS_F6F4
- Number of VALU matrix with the performed math operations (add or mul) divided by 512, assuming a full EXEC mask of F6 or F4 data type.
* - SQ_ACTIVE_INST_VALU2
- Number of quad-cycles when two VALU instructions are issued (per-simd, nondeterministic).
* - SQ_INSTS_LDS_LOAD
- Number of LDS load instructions issued (per-simd, emulated).
* - SQ_INSTS_LDS_STORE
- Number of LDS store instructions issued (per-simd, emulated).
* - SQ_INSTS_LDS_ATOMIC
- Number of LDS atomic instructions issued (per-simd, emulated).
* - SQ_INSTS_LDS_LOAD_BANDWIDTH
- Total number of 64-bytes loaded (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
* - SQ_INSTS_LDS_STORE_BANDWIDTH
- Total number of 64-bytes written (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
* - SQ_INSTS_LDS_ATOMIC_BANDWIDTH
- Total number of 64-bytes atomic (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
* - SQ_INSTS_VALU_FLOPS_FP16
- Counts FLOPS per instruction on float 16 excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_FLOPS_FP32
- Counts FLOPS per instruction on float 32 excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_FLOPS_FP64
- Counts FLOPS per instruction on float 64 excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_FLOPS_FP16_TRANS
- Counts FLOPS per instruction on float 16 trans excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_FLOPS_FP32_TRANS
- Counts FLOPS per instruction on float 32 trans excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_FLOPS_FP64_TRANS
- Counts FLOPS per instruction on float 64 trans excluding MFMA/SMFMA.
* - SQ_INSTS_VALU_IOPS
- Counts OPS per instruction on integer or unsigned or bit data (per-simd, emulated).
* - SQ_LDS_DATA_FIFO_FULL
- Number of cycles LDS data FIFO is full (nondeterministic, unwindowed).
* - SQ_LDS_CMD_FIFO_FULL
- Number of cycles LDS command FIFO is full (nondeterministic, unwindowed).
* - SQ_VMEM_TA_ADDR_FIFO_FULL
- Number of cycles texture requests are stalled due to full address FIFO in TA (nondeterministic, unwindowed).
* - SQ_VMEM_TA_CMD_FIFO_FULL
- Number of cycles texture requests are stalled due to full cmd FIFO in TA (nondeterministic, unwindowed).
* - SQ_VMEM_WR_TA_DATA_FIFO_FULL
- Number of cycles texture writes are stalled due to full data FIFO in TA (nondeterministic, unwindowed).
* - SQC_ICACHE_MISSES_DUPLICATE
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
* - SQC_DCACHE_MISSES_DUPLICATE
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
Texture addressing (TA) unit counters
======================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - TA_BUFFER_READ_LDS_WAVEFRONTS
- Number of buffer read wavefronts for LDS return processed by the TA.
* - TA_FLAT_READ_LDS_WAVEFRONTS
- Number of flat opcode reads for LDS return processed by the TA.
Texture data (TD) unit counters
================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - TD_WRITE_ACKT_WAVEFRONT
- Number of write acknowledgments, sent to SQ and not to SP.
* - TD_TD_SP_TRAFFIC
- Number of times this TD sends data to the SP.
Texture cache per pipe (TCP) counters
======================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - TCP_TCP_TA_ADDR_STALL_CYCLES
- TCP stalls TA addr interface.
* - TCP_TCP_TA_DATA_STALL_CYCLES
- TCP stalls TA data interface. Now windowed.
* - TCP_LFIFO_STALL_CYCLES
- Memory latency FIFOs full stall.
* - TCP_RFIFO_STALL_CYCLES
- Memory Request FIFOs full stall.
* - TCP_TCR_RDRET_STALL
- Write into cache stalled by read return from TCR.
* - TCP_PENDING_STALL_CYCLES
- Stall due to data pending from L2.
* - TCP_UTCL1_SERIALIZATION_STALL
- Total number of stalls caused due to serializing translation requests through the UTCL1.
* - TCP_UTCL1_THRASHING_STALL
- Stall caused by thrashing feature in any probe. Lacks accuracy when the stall signal overlaps between probe0 and probe1, which is worse with MECO of thrashing deadlock. Some probe0 events could miss being counted in with MECO on. This perf count provides a rough thrashing estimate.
* - TCP_UTCL1_TRANSLATION_MISS_UNDER_MISS
- Translation miss_under_miss.
* - TCP_UTCL1_STALL_INFLIGHT_MAX
- Total UTCL1 stalls due to inflight counter saturation.
* - TCP_UTCL1_STALL_LRU_INFLIGHT
- Total UTCL1 stalls due to LRU cache line with inflight traffic.
* - TCP_UTCL1_STALL_MULTI_MISS
- Total UTCL1 stalls due to arbitrated multiple misses.
* - TCP_UTCL1_LFIFO_FULL
- Total UTCL1 and UTCL2 latency, which hides FIFO full cycles.
* - TCP_UTCL1_STALL_LFIFO_NOT_RES
- Total UTCL1 stalls due to UTCL2 latency, which hides FIFO output (not resident).
* - TCP_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS
- Total UTCL1 stalls due to UTCL2_req being out of credits.
* - TCP_CLIENT_UTCL1_INFLIGHT
- The sum of inflight client to UTCL1 requests per cycle.
* - TCP_TAGRAM0_REQ
- Total L2 requests mapping to TagRAM 0 from this TCP to all TCCs.
* - TCP_TAGRAM1_REQ
- Total L2 requests mapping to TagRAM 1 from this TCP to all TCCs.
* - TCP_TAGRAM2_REQ
- Total L2 requests mapping to TagRAM 2 from this TCP to all TCCs.
* - TCP_TAGRAM3_REQ
- Total L2 requests mapping to TagRAM 3 from this TCP to all TCCs.
* - TCP_TCP_LATENCY
- Total TCP wave latency (from the first clock of wave entering to the first clock of wave leaving). Divide by TA_TCP_STATE_READ to find average wave latency.
* - TCP_TCC_READ_REQ_LATENCY
- Total TCP to TCC request latency for reads and atomics with return. Not Windowed.
* - TCP_TCC_WRITE_REQ_LATENCY
- Total TCP to TCC request latency for writes and atomics without return. Not Windowed.
* - TCP_TCC_WRITE_REQ_HOLE_LATENCY
- Total TCP req to TCC hole latency for writes and atomics. Not Windowed.
Texture cache per channel (TCC) counters
=========================================
.. list-table::
:header-rows: 1
* - Hardware counter
- Definition
* - TCC_READ_SECTORS
- Total number of 32B data sectors in read requests.
* - TCC_WRITE_SECTORS
- Total number of 32B data sectors in write requests.
* - TCC_ATOMIC_SECTORS
- Total number of 32B data sectors in atomic requests.
* - TCC_BYPASS_REQ
- Number of bypass requests. This is measured at the tag block.
* - TCC_LATENCY_FIFO_FULL
- Number of cycles when the latency FIFO is full.
* - TCC_SRC_FIFO_FULL
- Number of cycles when the SRC FIFO is assumed to be full as measured at the IB block.
* - TCC_EA0_RDREQ_64B
- Number of 64-byte TCC/EA read requests.
* - TCC_EA0_RDREQ_128B
- Number of 128-byte TCC/EA read requests.
* - TCC_IB_REQ
- Number of requests through the IB. This measures the number of raw requests from graphics clients to this TCC.
* - TCC_IB_STALL
- Number of cycles when the IB output is stalled.
* - TCC_EA0_WRREQ_WRITE_DRAM
- Number of TCC/EA write requests (32-byte or 64-byte) destined for DRAM (MC).
* - TCC_EA0_WRREQ_ATOMIC_DRAM
- Number of TCC/EA atomic requests (32-byte or 64-byte) destined for DRAM (MC).
* - TCC_EA0_RDREQ_DRAM_32B
- Number of 32-byte TCC/EA read requests due to DRAM traffic. One 64-byte request is counted as two and one 128-byte as four.
* - TCC_EA0_RDREQ_GMI_32B
- Number of 32-byte TCC/EA read requests due to GMI traffic. One 64-byte request is counted as two and one 128-byte as four.
* - TCC_EA0_RDREQ_IO_32B
- Number of 32-byte TCC/EA read requests due to IO traffic. One 64-byte request is counted as two and one 128-byte as four.
* - TCC_EA0_WRREQ_WRITE_DRAM_32B
- Number of 32-byte TCC/EA write requests due to DRAM traffic. One 64-byte request is counted as two.
* - TCC_EA0_WRREQ_ATOMIC_DRAM_32B
- Number of 32-byte TCC/EA atomic requests due to DRAM traffic. One 64-byte request is counted as two.
* - TCC_EA0_WRREQ_WRITE_GMI_32B
- Number of 32-byte TCC/EA write requests due to GMI traffic. One 64-byte request is counted as two.
* - TCC_EA0_WRREQ_ATOMIC_GMI_32B
- Number of 32-byte TCC/EA atomic requests due to GMI traffic. One 64-byte request is counted as two.
* - TCC_EA0_WRREQ_WRITE_IO_32B
- Number of 32-byte TCC/EA write requests due to IO traffic. One 64-byte request is counted as two.
* - TCC_EA0_WRREQ_ATOMIC_IO_32B
- Number of 32-byte TCC/EA atomic requests due to IO traffic. One 64-byte request is counted as two.

View File

@@ -34,7 +34,7 @@ Runtime
```{code-block} shell ```{code-block} shell
:caption: Example to expose the 1. device and a device based on UUID. :caption: Example to expose the 1. device and a device based on UUID.
export ROCR_VISIBLE_DEVICES="0,GPU-4b2c1a9f-8d3e-6f7a-b5c9-2e4d8a1f6c3b" export ROCR_VISIBLE_DEVICES="0,GPU-DEADBEEFDEADBEEF"
``` ```
### `GPU_DEVICE_ORDINAL` ### `GPU_DEVICE_ORDINAL`

View File

@@ -8,7 +8,6 @@ import os
import shutil import shutil
import sys import sys
from pathlib import Path from pathlib import Path
from subprocess import run
gh_release_path = os.path.join("..", "RELEASE.md") gh_release_path = os.path.join("..", "RELEASE.md")
gh_changelog_path = os.path.join("..", "CHANGELOG.md") gh_changelog_path = os.path.join("..", "CHANGELOG.md")
@@ -81,27 +80,24 @@ latex_elements = {
} }
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm.docs.amd.com") html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm.docs.amd.com")
html_context = {"docs_header_version": "7.1.1"} html_context = {}
if os.environ.get("READTHEDOCS", "") == "True": if os.environ.get("READTHEDOCS", "") == "True":
html_context["READTHEDOCS"] = True html_context["READTHEDOCS"] = True
# Check if the branch is a docs/ branch
official_branch = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True).stdout.find("docs/")
# configurations for PDF output by Read the Docs # configurations for PDF output by Read the Docs
project = "ROCm Documentation" project = "ROCm Documentation"
project_path = os.path.abspath(".").replace("\\", "/") project_path = os.path.abspath(".").replace("\\", "/")
author = "Advanced Micro Devices, Inc." author = "Advanced Micro Devices, Inc."
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved." copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
version = "7.1.1" version = "6.4.3"
release = "7.1.1" release = "6.4.3"
setting_all_article_info = True setting_all_article_info = True
all_article_info_os = ["linux", "windows"] all_article_info_os = ["linux", "windows"]
all_article_info_author = "" all_article_info_author = ""
# pages with specific settings # pages with specific settings
article_pages = [ article_pages = [
{"file": "about/release-notes", "os": ["linux"], "date": "2025-11-26"}, {"file": "about/release-notes", "os": ["linux"], "date": "2025-08-07"},
{"file": "release/changelog", "os": ["linux"],}, {"file": "release/changelog", "os": ["linux"],},
{"file": "compatibility/compatibility-matrix", "os": ["linux"]}, {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]}, {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
@@ -111,17 +107,12 @@ article_pages = [
{"file": "compatibility/ml-compatibility/stanford-megatron-lm-compatibility", "os": ["linux"]}, {"file": "compatibility/ml-compatibility/stanford-megatron-lm-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/dgl-compatibility", "os": ["linux"]}, {"file": "compatibility/ml-compatibility/dgl-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/megablocks-compatibility", "os": ["linux"]}, {"file": "compatibility/ml-compatibility/megablocks-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/ray-compatibility", "os": ["linux"]}, {"file": "compatibility/ml-compatibility/taichi-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/llama-cpp-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/flashinfer-compatibility", "os": ["linux"]},
{"file": "how-to/deep-learning-rocm", "os": ["linux"]}, {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-setup/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-setup/multi-node-setup", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-setup/prerequisite-system-validation", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-setup/system-health-check", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
@@ -133,37 +124,15 @@ article_pages = [
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.3", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.3", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.6", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.8", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.9", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.10", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-megatron", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.8", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.9", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.10", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.3", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.3", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.6", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.7", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.8", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.9", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.10", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.8", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.9", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.10", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
@@ -187,17 +156,7 @@ article_pages = [
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250702", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250702", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250715", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250715", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.0-20250812", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.1-20250909", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.11.1-20251103", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/sglang-history", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.11", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.12", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.13", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
@@ -225,7 +184,7 @@ external_toc_path = "./sphinx/_toc.yml"
# Add the _extensions directory to Python's search path # Add the _extensions directory to Python's search path
sys.path.append(str(Path(__file__).parent / 'extension')) sys.path.append(str(Path(__file__).parent / 'extension'))
extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "remote-content", "version-ref", "csv-to-list-table"] extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref", "csv-to-list-table"]
compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv') compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv')
@@ -235,14 +194,10 @@ external_projects_current_project = "rocm"
# external_projects_remote_repository = "" # external_projects_remote_repository = ""
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "https://rocm-stg.amd.com/") html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "https://rocm-stg.amd.com/")
html_context = {"docs_header_version": "7.1.0"} html_context = {}
if os.environ.get("READTHEDOCS", "") == "True": if os.environ.get("READTHEDOCS", "") == "True":
html_context["READTHEDOCS"] = True html_context["READTHEDOCS"] = True
html_context["official_branch"] = official_branch
html_context["version"] = version
html_context["release"] = release
html_theme = "rocm_docs_theme" html_theme = "rocm_docs_theme"
html_theme_options = {"flavor": "rocm-docs-home"} html_theme_options = {"flavor": "rocm-docs-home"}
@@ -261,13 +216,10 @@ suppress_warnings = ["autosectionlabel.*"]
html_context = { html_context = {
"project_path" : {project_path}, "project_path" : {project_path},
"gpu_type" : [('AMD Instinct GPUs', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ], "gpu_type" : [('AMD Instinct accelerators', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ],
"atomics_type" : [('HW atomics', 'hw-atomics'), ('CAS emulation', 'cas-atomics')], "atomics_type" : [('HW atomics', 'hw-atomics'), ('CAS emulation', 'cas-atomics')],
"pcie_type" : [('No PCIe atomics', 'nopcie'), ('PCIe atomics', 'pcie')], "pcie_type" : [('No PCIe atomics', 'nopcie'), ('PCIe atomics', 'pcie')],
"memory_type" : [('Device DRAM', 'device-dram'), ('Migratable Host DRAM', 'migratable-host-dram'), ('Pinned Host DRAM', 'pinned-host-dram')], "memory_type" : [('Device DRAM', 'device-dram'), ('Migratable Host DRAM', 'migratable-host-dram'), ('Pinned Host DRAM', 'pinned-host-dram')],
"granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')], "granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')],
"scope_type" : [('Device', 'device'), ('System', 'system')] "scope_type" : [('Device', 'device'), ('System', 'system')]
} }
# Disable figure and table numbering
numfig = False

Binary file not shown.

Before

Width:  |  Height:  |  Size: 114 KiB

After

Width:  |  Height:  |  Size: 81 KiB

View File

@@ -1,91 +0,0 @@
vllm_benchmark:
unified_docker:
latest:
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa
rocm_version: 6.4.1
vllm_version: 0.10.0 (0.10.1.dev395+g340ea86df.rocm641)
pytorch_version: 2.7.0+gitf717b2a
hipblaslt_version: 0.15
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16
- model: Llama 3.1 70B
mad_tag: pyt_vllm_llama-3.1-70b
model_repo: meta-llama/Llama-3.1-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: float16
- model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 70B FP8
mad_tag: pyt_vllm_llama-3.1-70b_fp8
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16
- model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
- model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8
- group: Qwen
tag: qwen
models:
- model: QwQ-32B
mad_tag: pyt_vllm_qwq-32b
model_repo: Qwen/QwQ-32B
url: https://huggingface.co/Qwen/QwQ-32B
precision: float16
- model: Qwen3 30B A3B
mad_tag: pyt_vllm_qwen3-30b-a3b
model_repo: Qwen/Qwen3-30B-A3B
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
precision: float16
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4

View File

@@ -1,188 +0,0 @@
dockers:
- pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c
components:
ROCm: 6.4.1
vLLM: 0.10.1 (0.10.1rc2.dev409+g0b6bf6691.rocm641)
PyTorch: 2.7.0+gitf717b2a
hipBLASLt: 0.15
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 70B
mad_tag: pyt_vllm_llama-3.1-70b
model_repo: meta-llama/Llama-3.1-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 4096
max_num_batched_tokens: 4096
max_model_len: 4096
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 70B FP8
mad_tag: pyt_vllm_llama-3.1-70b_fp8
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 32768
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 65536
max_num_batched_tokens: 65536
max_model_len: 8192
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_seq_len_to_capture: 32768
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_seq_len_to_capture: 65536
max_num_batched_tokens: 65536
max_model_len: 8192
- group: Qwen
tag: qwen
models:
- model: QwQ-32B
mad_tag: pyt_vllm_qwq-32b
model_repo: Qwen/QwQ-32B
url: https://huggingface.co/Qwen/QwQ-32B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 131072
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Qwen3 30B A3B
mad_tag: pyt_vllm_qwen3-30b-a3b
model_repo: Qwen/Qwen3-30B-A3B
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 32768
max_num_batched_tokens: 32768
max_model_len: 8192
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_seq_len_to_capture: 16384
max_num_batched_tokens: 16384
max_model_len: 8192

View File

@@ -1,316 +0,0 @@
dockers:
- pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
components:
ROCm: 7.0.0
vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
PyTorch: 2.9.0a0+git1c57644
hipBLASLt: 1.0.0
dockerfile:
commit: 790d22168820507f3105fef29596549378cfe399
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 4096
max_model_len: 4096
- model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B MXFP4
mad_tag: pyt_vllm_llama-3.1-405b_fp4
model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B
mad_tag: pyt_vllm_llama-3.3-70b
model_repo: meta-llama/Llama-3.3-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B FP8
mad_tag: pyt_vllm_llama-3.3-70b_fp8
model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B MXFP4
mad_tag: pyt_vllm_llama-3.3-70b_fp4
model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 4 Scout 17Bx16E
mad_tag: pyt_vllm_llama-4-scout-17b-16e
model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E
mad_tag: pyt_vllm_llama-4-maverick-17b-128e
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E FP8
mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek R1 0528 FP8
mad_tag: pyt_vllm_deepseek-r1
model_repo: deepseek-ai/DeepSeek-R1-0528
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_seqs: 1024
max_num_batched_tokens: 131072
max_model_len: 8192
- group: OpenAI GPT OSS
tag: gpt-oss
models:
- model: GPT OSS 20B
mad_tag: pyt_vllm_gpt-oss-20b
model_repo: openai/gpt-oss-20b
url: https://huggingface.co/openai/gpt-oss-20b
precision: bfloat16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- model: GPT OSS 120B
mad_tag: pyt_vllm_gpt-oss-120b
model_repo: openai/gpt-oss-120b
url: https://huggingface.co/openai/gpt-oss-120b
precision: bfloat16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 65536
max_model_len: 8192
- model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 65536
max_model_len: 8192
- group: Qwen
tag: qwen
models:
- model: Qwen3 8B
mad_tag: pyt_vllm_qwen3-8b
model_repo: Qwen/Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 32B
mad_tag: pyt_vllm_qwen3-32b
model_repo: Qwen/Qwen3-32b
url: https://huggingface.co/Qwen/Qwen3-32B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B
mad_tag: pyt_vllm_qwen3-30b-a3b
model_repo: Qwen/Qwen3-30B-A3B
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B FP8
mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
model_repo: Qwen/Qwen3-30B-A3B-FP8
url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B
mad_tag: pyt_vllm_qwen3-235b-a22b
model_repo: Qwen/Qwen3-235B-A22B
url: https://huggingface.co/Qwen/Qwen3-235B-A22B
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B FP8
mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
model_repo: Qwen/Qwen3-235B-A22B-FP8
url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 16384
max_model_len: 8192

View File

@@ -1,316 +0,0 @@
dockers:
- pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.1_20251103
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.1_20251103/images/sha256-8d60429043d4d00958da46039a1de0d9b82df814d45da482497eef26a6076506
components:
ROCm: 7.0.0
vLLM: 0.11.1 (0.11.1rc2.dev141+g38f225c2a.rocm700)
PyTorch: 2.9.0a0+git1c57644
hipBLASLt: 1.0.0
dockerfile:
commit: 38f225c2abeadc04c2cc398814c2f53ea02c3c72
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 4096
max_model_len: 4096
- model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B MXFP4
mad_tag: pyt_vllm_llama-3.1-405b_fp4
model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B
mad_tag: pyt_vllm_llama-3.3-70b
model_repo: meta-llama/Llama-3.3-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B FP8
mad_tag: pyt_vllm_llama-3.3-70b_fp8
model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B MXFP4
mad_tag: pyt_vllm_llama-3.3-70b_fp4
model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 4 Scout 17Bx16E
mad_tag: pyt_vllm_llama-4-scout-17b-16e
model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E
mad_tag: pyt_vllm_llama-4-maverick-17b-128e
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E FP8
mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek R1 0528 FP8
mad_tag: pyt_vllm_deepseek-r1
model_repo: deepseek-ai/DeepSeek-R1-0528
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_seqs: 1024
max_num_batched_tokens: 131072
max_model_len: 8192
- group: OpenAI GPT OSS
tag: gpt-oss
models:
- model: GPT OSS 20B
mad_tag: pyt_vllm_gpt-oss-20b
model_repo: openai/gpt-oss-20b
url: https://huggingface.co/openai/gpt-oss-20b
precision: bfloat16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- model: GPT OSS 120B
mad_tag: pyt_vllm_gpt-oss-120b
model_repo: openai/gpt-oss-120b
url: https://huggingface.co/openai/gpt-oss-120b
precision: bfloat16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 65536
max_model_len: 8192
- model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 65536
max_model_len: 8192
- group: Qwen
tag: qwen
models:
- model: Qwen3 8B
mad_tag: pyt_vllm_qwen3-8b
model_repo: Qwen/Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 32B
mad_tag: pyt_vllm_qwen3-32b
model_repo: Qwen/Qwen3-32b
url: https://huggingface.co/Qwen/Qwen3-32B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B
mad_tag: pyt_vllm_qwen3-30b-a3b
model_repo: Qwen/Qwen3-30B-A3B
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B FP8
mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
model_repo: Qwen/Qwen3-30B-A3B-FP8
url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B
mad_tag: pyt_vllm_qwen3-235b-a22b
model_repo: Qwen/Qwen3-235B-A22B
url: https://huggingface.co/Qwen/Qwen3-235B-A22B
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B FP8
mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
model_repo: Qwen/Qwen3-235B-A22B-FP8
url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 16384
max_model_len: 8192

View File

@@ -1,55 +0,0 @@
xdit_diffusion_inference:
docker:
pull_tag: rocm/pytorch-xdit:v25.10
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e
ROCm: 7.9.0
components:
TheRock: 7afbe45
rccl: 9b04b2a
composable_kernel: b7a806f
rocm-libraries: f104555
rocm-systems: 25922d0
torch: 2.10.0a0+gite9c9017
torchvision: 0.22.0a0+966da7e
triton: 3.5.0+git52e49c12
accelerate: 1.11.0.dev0
aiter: 0.1.5.post4.dev20+ga25e55e79
diffusers: 0.36.0.dev0
xfuser: 0.4.4
yunchang: 0.6.3.post1
model_groups:
- group: Hunyuan Video
tag: hunyuan
models:
- model: Hunyuan Video
model_name: hunyuanvideo
model_repo: tencent/HunyuanVideo
revision: refs/pr/18
url: https://huggingface.co/tencent/HunyuanVideo
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
mad_tag: pyt_xdit_hunyuanvideo
- group: Wan-AI
tag: wan
models:
- model: Wan2.1
model_name: wan2_1-i2v-14b-720p
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
github: https://github.com/Wan-Video/Wan2.1
mad_tag: pyt_xdit_wan_2_1
- model: Wan2.2
model_name: wan2_2-i2v-a14b
model_repo: Wan-AI/Wan2.2-I2V-A14B
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
github: https://github.com/Wan-Video/Wan2.2
mad_tag: pyt_xdit_wan_2_2
- group: FLUX
tag: flux
models:
- model: FLUX.1
model_name: FLUX.1-dev
model_repo: black-forest-labs/FLUX.1-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux

View File

@@ -1,109 +0,0 @@
xdit_diffusion_inference:
docker:
- version: v25-11
pull_tag: rocm/pytorch-xdit:v25.11
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216
ROCm: 7.10.0
supported_models:
- group: Hunyuan Video
models:
- Hunyuan Video
- group: Wan-AI
models:
- Wan2.1
- Wan2.2
- group: FLUX
models:
- FLUX.1
whats_new:
- "Minor bug fixes and clarifications to READMEs."
- "Bumps TheRock, AITER, Diffusers, xDiT versions."
- "Changes Aiter rounding mode for faster gfx942 FWD Attention."
components:
TheRock: 3e3f834
rccl: d23d18f
composable_kernel: 2570462
rocm-libraries: 0588f07
rocm-systems: 473025a
torch: 73adac
torchvision: f5c6c2e
triton: 7416ffc
accelerate: 34c1779
aiter: de14bec
diffusers: 40528e9
xfuser: 83978b5
yunchang: 2c9b712
- version: v25-10
pull_tag: rocm/pytorch-xdit:v25.10
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
ROCm: 7.9.0
supported_models:
- group: Hunyuan Video
models:
- Hunyuan Video
- group: Wan-AI
models:
- Wan2.1
- Wan2.2
- group: FLUX
models:
- FLUX.1
whats_new:
- "First official xDiT Docker Release for Diffusion Inference."
- "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
- "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
components:
TheRock: 7afbe45
rccl: 9b04b2a
composable_kernel: b7a806f
rocm-libraries: f104555
rocm-systems: 25922d0
torch: 2.10.0a0+gite9c9017
torchvision: 0.22.0a0+966da7e
triton: 3.5.0+git52e49c12
accelerate: 1.11.0.dev0
aiter: 0.1.5.post4.dev20+ga25e55e79
diffusers: 0.36.0.dev0
xfuser: 0.4.4
yunchang: 0.6.3.post1
model_groups:
- group: Hunyuan Video
tag: hunyuan
models:
- model: Hunyuan Video
page_tag: hunyuan_tag
model_name: hunyuanvideo
model_repo: tencent/HunyuanVideo
revision: refs/pr/18
url: https://huggingface.co/tencent/HunyuanVideo
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
mad_tag: pyt_xdit_hunyuanvideo
- group: Wan-AI
tag: wan
models:
- model: Wan2.1
page_tag: wan_21_tag
model_name: wan2_1-i2v-14b-720p
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
github: https://github.com/Wan-Video/Wan2.1
mad_tag: pyt_xdit_wan_2_1
- model: Wan2.2
page_tag: wan_22_tag
model_name: wan2_2-i2v-a14b
model_repo: Wan-AI/Wan2.2-I2V-A14B
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
github: https://github.com/Wan-Video/Wan2.2
mad_tag: pyt_xdit_wan_2_2
- group: FLUX
tag: flux
models:
- model: FLUX.1
page_tag: flux_1_tag
model_name: FLUX.1-dev
model_repo: black-forest-labs/FLUX.1-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux

View File

@@ -1,91 +0,0 @@
docker:
pull_tag: rocm/pytorch-xdit:v25.12
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256
ROCm: 7.10.0
whats_new:
- "Adds T2V and TI2V support for Wan models."
- "Adds support for SD-3.5 T2I model."
components:
TheRock:
version: 3e3f834
url: https://github.com/ROCm/TheRock
rccl:
version: d23d18f
url: https://github.com/ROCm/rccl
composable_kernel:
version: 2570462
url: https://github.com/ROCm/composable_kernel
rocm-libraries:
version: 0588f07
url: https://github.com/ROCm/rocm-libraries
rocm-systems:
version: 473025a
url: https://github.com/ROCm/rocm-systems
torch:
version: 73adac
url: https://github.com/pytorch/pytorch
torchvision:
version: f5c6c2e
url: https://github.com/pytorch/vision
triton:
version: 7416ffc
url: https://github.com/triton-lang/triton
accelerate:
version: 34c1779
url: https://github.com/huggingface/accelerate
aiter:
version: de14bec
url: https://github.com/ROCm/aiter
diffusers:
version: 40528e9
url: https://github.com/huggingface/diffusers
xfuser:
version: ccba9d5
url: https://github.com/xdit-project/xDiT
yunchang:
version: 2c9b712
url: https://github.com/feifeibear/long-context-attention
supported_models:
- group: Hunyuan Video
js_tag: hunyuan
models:
- model: Hunyuan Video
model_repo: tencent/HunyuanVideo
revision: refs/pr/18
url: https://huggingface.co/tencent/HunyuanVideo
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
mad_tag: pyt_xdit_hunyuanvideo
js_tag: hunyuan_tag
- group: Wan-AI
js_tag: wan
models:
- model: Wan2.1
model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
github: https://github.com/Wan-Video/Wan2.1
mad_tag: pyt_xdit_wan_2_1
js_tag: wan_21_tag
- model: Wan2.2
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
github: https://github.com/Wan-Video/Wan2.2
mad_tag: pyt_xdit_wan_2_2
js_tag: wan_22_tag
- group: FLUX
js_tag: flux
models:
- model: FLUX.1
model_repo: black-forest-labs/FLUX.1-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux
js_tag: flux_1_tag
- group: Stable Diffusion
js_tag: stablediffusion
models:
- model: stable-diffusion-3.5-large
model_repo: stabilityai/stable-diffusion-3.5-large
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
github: https://github.com/Stability-AI/sd3.5
mad_tag: pyt_xdit_sd_3_5
js_tag: stable_diffusion_3_5_large_tag

View File

@@ -1,16 +1,17 @@
dockers: sglang_benchmark:
- pull_tag: lmsysorg/sglang:v0.4.5-rocm630 unified_docker:
docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.4.5-rocm630/images/sha256-63d2cb760a237125daf6612464cfe2f395c0784e21e8b0ea37d551cd10d3c951 latest:
components: pull_tag: lmsysorg/sglang:v0.4.5-rocm630
ROCm: 6.3.0 docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.4.5-rocm630/images/sha256-63d2cb760a237125daf6612464cfe2f395c0784e21e8b0ea37d551cd10d3c951
SGLang: 0.4.5 (0.4.5-rocm) rocm_version: 6.3.0
PyTorch: 2.6.0a0+git8d4926e sglang_version: 0.4.5 (0.4.5-rocm)
model_groups: pytorch_version: 2.6.0a0+git8d4926e
- group: DeepSeek model_groups:
tag: deepseek - group: DeepSeek
models: tag: deepseek
- model: DeepSeek-R1-Distill-Qwen-32B models:
mad_tag: pyt_sglang_deepseek-r1-distill-qwen-32b - model: DeepSeek-R1-Distill-Qwen-32B
model_repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B mad_tag: pyt_sglang_deepseek-r1-distill-qwen-32b
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B model_repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
precision: bfloat16 url: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
precision: bfloat16

View File

@@ -1,32 +0,0 @@
dockers:
- pull_tag: lmsysorg/sglang:v0.5.2rc1-rocm700-mi30x
docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.5.2rc1-rocm700-mi30x/images/sha256-10c4ee502ddba44dd8c13325e6e03868bfe7f43d23d0a44780a8ee8b393f4729
components:
ROCm: 7.0.0
SGLang: v0.5.2rc1
pytorch-triton-rocm: 3.4.0+rocm7.0.0.gitf9e5bf54
model_groups:
- group: Dense models
tag: dense-models
models:
- model: Llama 3.1 8B Instruct
model_repo: Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
- model: Llama 3.1 405B FP8 KV
model_repo: Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
- model: Llama 3.3 70B FP8 KV
model_repo: amd-Llama-3.3-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
- model: Qwen3 32B
model_repo: Qwen3-32B
url: https://huggingface.co/Qwen/Qwen3-32B
- group: Small experts models
tag: small-experts-models
models:
- model: DeepSeek V3
model_repo: DeepSeek-V3
url: https://huggingface.co/deepseek-ai/DeepSeek-V3
- model: Mixtral 8x7B v0.1
model_repo: Mixtral-8x7B-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-v0.1

View File

@@ -1,316 +1,88 @@
dockers: vllm_benchmark:
- pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.2_20251210 unified_docker:
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.2_20251210/images/sha256-e7f02dd2ce3824959658bc0391296f6158638e3ebce164f6c019c4eca8150ec7 latest:
components: # TODO: update me
ROCm: 7.0.0 pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812
vLLM: 0.11.2 (0.11.2.dev673+g839868462.rocm700) docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa
PyTorch: 2.9.0a0+git1c57644 rocm_version: 6.4.1
hipBLASLt: 1.0.0 vllm_version: 0.10.0 (0.10.1.dev395+g340ea86df.rocm641)
dockerfile: pytorch_version: 2.7.0+gitf717b2a (2.7.0+gitf717b2a)
commit: 8398684622109c806a35d660647060b0b9910663 hipblaslt_version: 0.15
model_groups: model_groups:
- group: Meta Llama - group: Meta Llama
tag: llama tag: llama
models: models:
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 4096
max_model_len: 4096
- model: Llama 3.1 8B - model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16 precision: float16
config: - model: Llama 3.1 70B
tp: 1 mad_tag: pyt_vllm_llama-3.1-70b
dtype: auto model_repo: meta-llama/Llama-3.1-70B-Instruct
kv_cache_dtype: auto url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
max_num_batched_tokens: 131072 precision: float16
max_model_len: 8192
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B - model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16 precision: float16
config: - model: Llama 2 70B
tp: 8 mad_tag: pyt_vllm_llama-2-70b
dtype: auto model_repo: meta-llama/Llama-2-70b-chat-hf
kv_cache_dtype: auto url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
max_num_batched_tokens: 131072 precision: float16
max_model_len: 8192 - model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 70B FP8
mad_tag: pyt_vllm_llama-3.1-70b_fp8
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 405B FP8 - model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8 mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8 precision: float8
config: - group: Mistral AI
tp: 8 tag: mistral
dtype: auto models:
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.1 405B MXFP4
mad_tag: pyt_vllm_llama-3.1-405b_fp4
model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B
mad_tag: pyt_vllm_llama-3.3-70b
model_repo: meta-llama/Llama-3.3-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B FP8
mad_tag: pyt_vllm_llama-3.3-70b_fp8
model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 3.3 70B MXFP4
mad_tag: pyt_vllm_llama-3.3-70b_fp4
model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
precision: float4
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- model: Llama 4 Scout 17Bx16E
mad_tag: pyt_vllm_llama-4-scout-17b-16e
model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E
mad_tag: pyt_vllm_llama-4-maverick-17b-128e
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Llama 4 Maverick 17Bx128E FP8
mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 131072
max_model_len: 8192
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek R1 0528 FP8
mad_tag: pyt_vllm_deepseek-r1
model_repo: deepseek-ai/DeepSeek-R1-0528
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_seqs: 1024
max_num_batched_tokens: 131072
max_model_len: 8192
- group: OpenAI GPT OSS
tag: gpt-oss
models:
- model: GPT OSS 20B
mad_tag: pyt_vllm_gpt-oss-20b
model_repo: openai/gpt-oss-20b
url: https://huggingface.co/openai/gpt-oss-20b
precision: bfloat16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- model: GPT OSS 120B
mad_tag: pyt_vllm_gpt-oss-120b
model_repo: openai/gpt-oss-120b
url: https://huggingface.co/openai/gpt-oss-120b
precision: bfloat16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 8192
max_model_len: 8192
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B - model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1 model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16 precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 32768
max_model_len: 8192
- model: Mixtral MoE 8x22B - model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1 model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1 url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16 precision: float16
config: - model: Mixtral MoE 8x7B FP8
tp: 8 mad_tag: pyt_vllm_mixtral-8x7b_fp8
dtype: auto model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
kv_cache_dtype: auto url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
max_num_batched_tokens: 65536 precision: float8
max_model_len: 8192
- model: Mixtral MoE 8x22B FP8 - model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8 mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8 precision: float8
config: - group: Qwen
tp: 8 tag: qwen
dtype: auto models:
kv_cache_dtype: fp8 - model: QwQ-32B
max_num_batched_tokens: 65536 mad_tag: pyt_vllm_qwq-32b
max_model_len: 8192 model_repo: Qwen/QwQ-32B
- group: Qwen url: https://huggingface.co/Qwen/QwQ-32B
tag: qwen
models:
- model: Qwen3 8B
mad_tag: pyt_vllm_qwen3-8b
model_repo: Qwen/Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: float16 precision: float16
config: tunableop: true
tp: 1 - group: Microsoft Phi
dtype: auto tag: phi
kv_cache_dtype: auto models:
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 32B
mad_tag: pyt_vllm_qwen3-32b
model_repo: Qwen/Qwen3-32b
url: https://huggingface.co/Qwen/Qwen3-32B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B
mad_tag: pyt_vllm_qwen3-30b-a3b
model_repo: Qwen/Qwen3-30B-A3B
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 30B A3B FP8
mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
model_repo: Qwen/Qwen3-30B-A3B-FP8
url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B
mad_tag: pyt_vllm_qwen3-235b-a22b
model_repo: Qwen/Qwen3-235B-A22B
url: https://huggingface.co/Qwen/Qwen3-235B-A22B
precision: float16
config:
tp: 8
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 40960
max_model_len: 8192
- model: Qwen3 235B A22B FP8
mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
model_repo: Qwen/Qwen3-235B-A22B-FP8
url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
precision: float8
config:
tp: 8
dtype: auto
kv_cache_dtype: fp8
max_num_batched_tokens: 40960
max_model_len: 8192
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4 - model: Phi-4
mad_tag: pyt_vllm_phi-4 mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4 model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4 url: https://huggingface.co/microsoft/phi-4
precision: float16
config:
tp: 1
dtype: auto
kv_cache_dtype: auto
max_num_batched_tokens: 16384
max_model_len: 8192

View File

@@ -1,105 +0,0 @@
docker:
pull_tag: rocm/pytorch-xdit:v25.13
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
ROCm: 7.11.0
whats_new:
- "Flux.1 Kontext support"
- "Flux.2 Dev support"
- "Flux FP8 GEMM support"
- "Hybrid FP8 attention support for Wan models"
components:
TheRock:
version: 1728a81
url: https://github.com/ROCm/TheRock
rccl:
version: d23d18f
url: https://github.com/ROCm/rccl
composable_kernel:
version: ab0101c
url: https://github.com/ROCm/composable_kernel
rocm-libraries:
version: a2f7c35
url: https://github.com/ROCm/rocm-libraries
rocm-systems:
version: 659737c
url: https://github.com/ROCm/rocm-systems
torch:
version: 91be249
url: https://github.com/ROCm/pytorch
torchvision:
version: b919bd0
url: https://github.com/pytorch/vision
triton:
version: a272dfa
url: https://github.com/ROCm/triton
accelerate:
version: b521400f
url: https://github.com/huggingface/accelerate
aiter:
version: de14bec0
url: https://github.com/ROCm/aiter
diffusers:
version: a1f36ee3e
url: https://github.com/huggingface/diffusers
xfuser:
version: adf2681
url: https://github.com/xdit-project/xDiT
yunchang:
version: 2c9b712
url: https://github.com/feifeibear/long-context-attention
supported_models:
- group: Hunyuan Video
js_tag: hunyuan
models:
- model: Hunyuan Video
model_repo: tencent/HunyuanVideo
revision: refs/pr/18
url: https://huggingface.co/tencent/HunyuanVideo
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
mad_tag: pyt_xdit_hunyuanvideo
js_tag: hunyuan_tag
- group: Wan-AI
js_tag: wan
models:
- model: Wan2.1
model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
github: https://github.com/Wan-Video/Wan2.1
mad_tag: pyt_xdit_wan_2_1
js_tag: wan_21_tag
- model: Wan2.2
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
github: https://github.com/Wan-Video/Wan2.2
mad_tag: pyt_xdit_wan_2_2
js_tag: wan_22_tag
- group: FLUX
js_tag: flux
models:
- model: FLUX.1
model_repo: black-forest-labs/FLUX.1-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux
js_tag: flux_1_tag
- model: FLUX.1 Kontext
model_repo: black-forest-labs/FLUX.1-Kontext-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux_kontext
js_tag: flux_1_kontext_tag
- model: FLUX.2
model_repo: black-forest-labs/FLUX.2-dev
url: https://huggingface.co/black-forest-labs/FLUX.2-dev
github: https://github.com/black-forest-labs/flux2
mad_tag: pyt_xdit_flux_2
js_tag: flux_2_tag
- group: StableDiffusion
js_tag: stablediffusion
models:
- model: stable-diffusion-3.5-large
model_repo: stabilityai/stable-diffusion-3.5-large
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
github: https://github.com/Stability-AI/sd3.5
mad_tag: pyt_xdit_sd_3_5
js_tag: stable_diffusion_3_5_large_tag

View File

@@ -1,64 +0,0 @@
dockers:
- pull_tag: rocm/jax-training:maxtext-v25.11
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.11/images/sha256-18e4d8f0b8ce7a7422c58046940dd5f32249960449fca09a562b65fb8eb1562a
components:
ROCm: 7.1.0
JAX: 0.7.1
Python: 3.12
Transformer Engine: 2.4.0.dev0+281042de
hipBLASLt: 1.2.x
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 2 7B
mad_tag: jax_maxtext_train_llama-2-7b
model_repo: Llama-2-7B
precision: bf16
multinode_training_script: llama2_7b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 2 70B
mad_tag: jax_maxtext_train_llama-2-70b
model_repo: Llama-2-70B
precision: bf16
multinode_training_script: llama2_70b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 3 8B (multi-node)
mad_tag: jax_maxtext_train_llama-3-8b
multinode_training_script: llama3_8b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3 70B (multi-node)
mad_tag: jax_maxtext_train_llama-3-70b
multinode_training_script: llama3_70b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3.1 8B
mad_tag: jax_maxtext_train_llama-3.1-8b
model_repo: Llama-3.1-8B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 70B
mad_tag: jax_maxtext_train_llama-3.1-70b
model_repo: Llama-3.1-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.3 70B
mad_tag: jax_maxtext_train_llama-3.3-70b
model_repo: Llama-3.3-70B
precision: bf16
doc_options: ["single-node"]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V2-Lite (16B)
mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
model_repo: DeepSeek-V2-lite
precision: bf16
doc_options: ["single-node"]
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: jax_maxtext_train_mixtral-8x7b
model_repo: Mixtral-8x7B
precision: bf16
doc_options: ["single-node"]

View File

@@ -1,17 +1,15 @@
docker: dockers:
pull_tag: rocm/primus:v25.10 - pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197 docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components: components:
ROCm: 7.1.0 ROCm: 6.4.2
Primus: 0.3.0 Primus: v0.1.0-rc1
Primus Turbo: 0.1.1 PyTorch: 2.8.0a0+gitd06a406
PyTorch: 2.10.0.dev20251112+rocm7.1 Python: "3.10"
Python: "3.10" Transformer Engine: 2.1.0.dev0+ba586519
Transformer Engine: 2.4.0.dev0+32e2d1d4 hipBLASLt: 37ba1d36
Flash Attention: 2.8.3 Triton: 3.3.0
hipBLASLt: 1.2.0-09ab7153e2 RCCL: 2.22.3
Triton: 3.4.0
RCCL: 2.27.7
model_groups: model_groups:
- group: Meta Llama - group: Meta Llama
tag: llama tag: llama
@@ -22,6 +20,8 @@ model_groups:
mad_tag: pyt_megatron_lm_train_llama-3.1-8b mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B - model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 3.1 70B (proxy)
mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
- model: Llama 2 7B - model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B - model: Llama 2 70B

View File

@@ -1,72 +0,0 @@
dockers:
- pull_tag: rocm/jax-training:maxtext-v25.7-jax060
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
components:
ROCm: 6.4.1
JAX: 0.6.0
Python: 3.10.12
Transformer Engine: 2.1.0+90d703dd
hipBLASLt: 1.1.0-499ece1c21
- pull_tag: rocm/jax-training:maxtext-v25.7
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
components:
ROCm: 6.4.1
JAX: 0.5.0
Python: 3.10.12
Transformer Engine: 2.1.0+90d703dd
hipBLASLt: 1.x.x
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: jax_maxtext_train_llama-3.3-70b
model_repo: Llama-3.3-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 8B
mad_tag: jax_maxtext_train_llama-3.1-8b
model_repo: Llama-3.1-8B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 70B
mad_tag: jax_maxtext_train_llama-3.1-70b
model_repo: Llama-3.1-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3 8B
mad_tag: jax_maxtext_train_llama-3-8b
multinode_training_script: llama3_8b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3 70B
mad_tag: jax_maxtext_train_llama-3-70b
multinode_training_script: llama3_70b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 2 7B
mad_tag: jax_maxtext_train_llama-2-7b
model_repo: Llama-2-7B
precision: bf16
multinode_training_script: llama2_7b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 2 70B
mad_tag: jax_maxtext_train_llama-2-70b
model_repo: Llama-2-70B
precision: bf16
multinode_training_script: llama2_70b_multinode.sh
doc_options: ["single-node", "multi-node"]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V2-Lite (16B)
mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
model_repo: DeepSeek-V2-lite
precision: bf16
doc_options: ["single-node"]
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: jax_maxtext_train_mixtral-8x7b
model_repo: Mixtral-8x7B
precision: bf16
doc_options: ["single-node"]

View File

@@ -1,64 +0,0 @@
dockers:
- pull_tag: rocm/jax-training:maxtext-v25.9.1
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.9.1/images/sha256-60946cfbd470f6ee361fc9da740233a4fb2e892727f01719145b1f7627a1cff6
components:
ROCm: 7.0.0
JAX: 0.6.2
Python: 3.10.18
Transformer Engine: 2.2.0.dev0+c91bac54
hipBLASLt: 1.x.x
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 2 7B
mad_tag: jax_maxtext_train_llama-2-7b
model_repo: Llama-2-7B
precision: bf16
multinode_training_script: llama2_7b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 2 70B
mad_tag: jax_maxtext_train_llama-2-70b
model_repo: Llama-2-70B
precision: bf16
multinode_training_script: llama2_70b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 3 8B (multi-node)
mad_tag: jax_maxtext_train_llama-3-8b
multinode_training_script: llama3_8b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3 70B (multi-node)
mad_tag: jax_maxtext_train_llama-3-70b
multinode_training_script: llama3_70b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3.1 8B
mad_tag: jax_maxtext_train_llama-3.1-8b
model_repo: Llama-3.1-8B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 70B
mad_tag: jax_maxtext_train_llama-3.1-70b
model_repo: Llama-3.1-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.3 70B
mad_tag: jax_maxtext_train_llama-3.3-70b
model_repo: Llama-3.3-70B
precision: bf16
doc_options: ["single-node"]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V2-Lite (16B)
mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
model_repo: DeepSeek-V2-lite
precision: bf16
doc_options: ["single-node"]
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: jax_maxtext_train_mixtral-8x7b
model_repo: Mixtral-8x7B
precision: bf16
doc_options: ["single-node"]

View File

@@ -1,49 +0,0 @@
docker:
pull_tag: rocm/primus:v25.10
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
components:
ROCm: 7.1.0
Primus: 0.3.0
Primus Turbo: 0.1.1
PyTorch: 2.10.0.dev20251112+rocm7.1
Python: "3.10"
Transformer Engine: 2.4.0.dev0+32e2d1d4
Flash Attention: 2.8.3
hipBLASLt: 1.2.0-09ab7153e2
Triton: 3.4.0
RCCL: 2.27.7
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B (proxy)
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
- model: Qwen 2.5 72B
mad_tag: pyt_megatron_lm_train_qwen2.5-72b

View File

@@ -1,49 +0,0 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components:
ROCm: 6.4.2
Primus: v0.1.0-rc1
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.1.0.dev0+ba586519
hipBLASLt: 37ba1d36
Triton: 3.3.0
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 3.1 70B (proxy)
mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B (proxy)
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
- model: Qwen 2.5 72B
mad_tag: pyt_megatron_lm_train_qwen2.5-72b

View File

@@ -1,48 +0,0 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.8_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
components:
ROCm: 6.4.3
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
hipBLASLt: d1b517fc7a
Triton: 3.3.0
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 3.1 70B (proxy)
mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B (proxy)
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
- model: Qwen 2.5 72B
mad_tag: pyt_megatron_lm_train_qwen2.5-72b

View File

@@ -1,53 +0,0 @@
dockers:
MI355X and MI350X:
pull_tag: rocm/megatron-lm:v25.9_gfx950
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
components: &docker_components
ROCm: 7.0.0
Primus: aab4234
PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
Flash Attention: 2.8.3
hipBLASLt: 911283acd1
Triton: 3.4.0+rocm7.0.0.git56765e8c
RCCL: 2.26.6
MI325X and MI300X:
pull_tag: rocm/megatron-lm:v25.9_gfx942
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
components: *docker_components
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B (proxy)
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
- model: Qwen 2.5 72B
mad_tag: pyt_megatron_lm_train_qwen2.5-72b

View File

@@ -1,58 +0,0 @@
docker:
pull_tag: rocm/primus:v25.10
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
components:
ROCm: 7.1.0
PyTorch: 2.10.0.dev20251112+rocm7.1
Python: "3.10"
Transformer Engine: 2.4.0.dev0+32e2d1d4
Flash Attention: 2.8.3
hipBLASLt: 1.2.0-09ab7153e2
Triton: 3.4.0
RCCL: 2.27.7
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
config_name: llama3.3_70B-pretrain.yaml
- model: Llama 3.1 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
config_name: llama3.1_70B-pretrain.yaml
- model: Llama 3.1 8B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
config_name: llama3.1_8B-pretrain.yaml
- model: Llama 2 7B
mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
config_name: llama2_7B-pretrain.yaml
- model: Llama 2 70B
mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
config_name: llama2_70B-pretrain.yaml
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
config_name: deepseek_v3-pretrain.yaml
- model: DeepSeek-V2-Lite
mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
config_name: deepseek_v2_lite-pretrain.yaml
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
config_name: mixtral_8x7B_v0.1-pretrain.yaml
- model: Mixtral 8x22B (proxy)
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
config_name: mixtral_8x22B_v0.1-pretrain.yaml
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
config_name: primus_qwen2.5_7B-pretrain.yaml
- model: Qwen 2.5 72B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
config_name: qwen2.5_72B-pretrain.yaml

View File

@@ -1,58 +0,0 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components:
ROCm: 6.4.2
Primus: v0.1.0-rc1
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.1.0.dev0+ba586519
hipBLASLt: 37ba1d36
Triton: 3.3.0
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
config_name: llama3.3_70B-pretrain.yaml
- model: Llama 3.1 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
config_name: llama3.1_70B-pretrain.yaml
- model: Llama 3.1 8B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
config_name: llama3.1_8B-pretrain.yaml
- model: Llama 2 7B
mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
config_name: llama2_7B-pretrain.yaml
- model: Llama 2 70B
mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
config_name: llama2_70B-pretrain.yaml
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
config_name: deepseek_v3-pretrain.yaml
- model: DeepSeek-V2-Lite
mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
config_name: deepseek_v2_lite-pretrain.yaml
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
config_name: mixtral_8x7B_v0.1-pretrain.yaml
- model: Mixtral 8x22B (proxy)
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
config_name: mixtral_8x22B_v0.1-pretrain.yaml
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
config_name: primus_qwen2.5_7B-pretrain.yaml
- model: Qwen 2.5 72B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
config_name: qwen2.5_72B-pretrain.yaml

View File

@@ -1,58 +0,0 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.8_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
components:
ROCm: 6.4.3
Primus: 927a717
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
hipBLASLt: d1b517fc7a
Triton: 3.3.0
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
config_name: llama3.3_70B-pretrain.yaml
- model: Llama 3.1 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
config_name: llama3.1_70B-pretrain.yaml
- model: Llama 3.1 8B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
config_name: llama3.1_8B-pretrain.yaml
- model: Llama 2 7B
mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
config_name: llama2_7B-pretrain.yaml
- model: Llama 2 70B
mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
config_name: llama2_70B-pretrain.yaml
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
config_name: deepseek_v3-pretrain.yaml
- model: DeepSeek-V2-Lite
mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
config_name: deepseek_v2_lite-pretrain.yaml
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
config_name: mixtral_8x7B_v0.1-pretrain.yaml
- model: Mixtral 8x22B (proxy)
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
config_name: mixtral_8x22B_v0.1-pretrain.yaml
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
config_name: primus_qwen2.5_7B-pretrain.yaml
- model: Qwen 2.5 72B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
config_name: qwen2.5_72B-pretrain.yaml

View File

@@ -1,65 +0,0 @@
dockers:
MI355X and MI350X:
pull_tag: rocm/primus:v25.9_gfx950
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
components: &docker_components
ROCm: 7.0.0
Primus: 0.3.0
Primus Turbo: 0.1.1
PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
Flash Attention: 2.8.3
hipBLASLt: 911283acd1
Triton: 3.4.0+rocm7.0.0.git56765e8c
RCCL: 2.26.6
MI325X and MI300X:
pull_tag: rocm/primus:v25.9_gfx942
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
components: *docker_components
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
config_name: llama3.3_70B-pretrain.yaml
- model: Llama 3.1 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
config_name: llama3.1_70B-pretrain.yaml
- model: Llama 3.1 8B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
config_name: llama3.1_8B-pretrain.yaml
- model: Llama 2 7B
mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
config_name: llama2_7B-pretrain.yaml
- model: Llama 2 70B
mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
config_name: llama2_70B-pretrain.yaml
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
config_name: deepseek_v3-pretrain.yaml
- model: DeepSeek-V2-Lite
mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
config_name: deepseek_v2_lite-pretrain.yaml
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
config_name: mixtral_8x7B_v0.1-pretrain.yaml
- model: Mixtral 8x22B (proxy)
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
config_name: mixtral_8x22B_v0.1-pretrain.yaml
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
config_name: primus_qwen2.5_7B-pretrain.yaml
- model: Qwen 2.5 72B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
config_name: qwen2.5_72B-pretrain.yaml

View File

@@ -1,32 +0,0 @@
docker:
pull_tag: rocm/primus:v25.10
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
components:
ROCm: 7.1.0
PyTorch: 2.10.0.dev20251112+rocm7.1
Python: "3.10"
Transformer Engine: 2.4.0.dev0+32e2d1d4
Flash Attention: 2.8.3
hipBLASLt: 1.2.0-09ab7153e2
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: primus_pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
- model: Llama 3.1 70B
mad_tag: primus_pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek V2 16B
mad_tag: primus_pyt_train_deepseek-v2
model_repo: DeepSeek-V2
url: https://huggingface.co/deepseek-ai/DeepSeek-V2
precision: BF16

View File

@@ -1,24 +0,0 @@
dockers:
- pull_tag: rocm/pytorch-training:v25.8
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
components:
ROCm: 6.4.3
PyTorch: 2.8.0a0+gitd06a406
Python: 3.10.18
Transformer Engine: 2.2.0.dev0+a1e66aae
Flash Attention: 3.0.0.post1
hipBLASLt: 1.1.0-d1b517fc7a
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: primus_pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
- model: Llama 3.1 70B
mad_tag: primus_pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16

View File

@@ -1,39 +0,0 @@
dockers:
MI355X and MI350X:
pull_tag: rocm/primus:v25.9_gfx950
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
components: &docker_components
ROCm: 7.0.0
Primus: 0.3.0
Primus Turbo: 0.1.1
PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
Flash Attention: 2.8.3
hipBLASLt: 911283acd1
Triton: 3.4.0+rocm7.0.0.git56765e8c
RCCL: 2.26.6
MI325X and MI300X:
pull_tag: rocm/primus:v25.9_gfx942
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
components: *docker_components
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: primus_pyt_train_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
config_file:
bf16: "./llama3_8b_fsdp_bf16.toml"
fp8: "./llama3_8b_fsdp_fp8.toml"
- model: Llama 3.1 70B
mad_tag: primus_pyt_train_llama-3.1-70b
model_repo: meta-llama/Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16
config_file:
bf16: "./llama3_70b_fsdp_bf16.toml"
fp8: "./llama3_70b_fsdp_fp8.toml"

View File

@@ -1,197 +0,0 @@
docker:
pull_tag: rocm/primus:v25.10
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
components:
ROCm: 7.1.0
Primus: 0.3.0
Primus Turbo: 0.1.1
PyTorch: 2.10.0.dev20251112+rocm7.1
Python: "3.10"
Transformer Engine: 2.4.0.dev0+32e2d1d4
Flash Attention: 2.8.3
hipBLASLt: 1.2.0-09ab7153e2
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e
model_repo: Llama-4-17B_16E
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.3 70B
mad_tag: pyt_train_llama-3.3-70b
model_repo: Llama-3.3-70B
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.2 1B
mad_tag: pyt_train_llama-3.2-1b
model_repo: Llama-3.2-1B
url: https://huggingface.co/meta-llama/Llama-3.2-1B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 3B
mad_tag: pyt_train_llama-3.2-3b
model_repo: Llama-3.2-3B
url: https://huggingface.co/meta-llama/Llama-3.2-3B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 Vision 11B
mad_tag: pyt_train_llama-3.2-vision-11b
model_repo: Llama-3.2-Vision-11B
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.2 Vision 90B
mad_tag: pyt_train_llama-3.2-vision-90b
model_repo: Llama-3.2-Vision-90B
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora]
- model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16
training_modes: [finetune_qlora]
- model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3 70B
mad_tag: pyt_train_llama-3-70b
model_repo: Llama-3-70B
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 7B
mad_tag: pyt_train_llama-2-7b
model_repo: Llama-2-7B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 2 13B
mad_tag: pyt_train_llama-2-13b
model_repo: Llama-2-13B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 70B
mad_tag: pyt_train_llama-2-70b
model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_lora, finetune_qlora]
- group: OpenAI
tag: openai
models:
- model: GPT OSS 20B
mad_tag: pyt_train_gpt_oss_20b
model_repo: GPT-OSS-20B
url: https://huggingface.co/openai/gpt-oss-20b
precision: BF16
training_modes: [HF_finetune_lora]
- model: GPT OSS 120B
mad_tag: pyt_train_gpt_oss_120b
model_repo: GPT-OSS-120B
url: https://huggingface.co/openai/gpt-oss-120b
precision: BF16
training_modes: [HF_finetune_lora]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek V2 16B
mad_tag: primus_pyt_train_deepseek-v2
model_repo: DeepSeek-V2
url: https://huggingface.co/deepseek-ai/DeepSeek-V2
precision: BF16
training_modes: [pretrain]
- group: Qwen
tag: qwen
models:
- model: Qwen 3 8B
mad_tag: pyt_train_qwen3-8b
model_repo: Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 3 32B
mad_tag: pyt_train_qwen3-32b
model_repo: Qwen3-32
url: https://huggingface.co/Qwen/Qwen3-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 32B
mad_tag: pyt_train_qwen2.5-32b
model_repo: Qwen2.5-32B
url: https://huggingface.co/Qwen/Qwen2.5-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 72B
mad_tag: pyt_train_qwen2.5-72b
model_repo: Qwen2.5-72B
url: https://huggingface.co/Qwen/Qwen2.5-72B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2 1.5B
mad_tag: pyt_train_qwen2-1.5b
model_repo: Qwen2-1.5B
url: https://huggingface.co/Qwen/Qwen2-1.5B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 2 7B
mad_tag: pyt_train_qwen2-7b
model_repo: Qwen2-7B
url: https://huggingface.co/Qwen/Qwen2-7B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- group: Stable Diffusion
tag: sd
models:
- model: Stable Diffusion XL
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
model_repo: SDXL
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
precision: BF16
training_modes: [posttrain]
- group: Flux
tag: flux
models:
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [posttrain]
- group: NCF
tag: ncf
models:
- model: NCF
mad_tag: pyt_ncf_training
model_repo:
url: https://github.com/ROCm/FluxBenchmark
precision: FP32
- group: DLRM
tag: dlrm
models:
- model: DLRM v2
mad_tag: pyt_train_dlrm
model_repo: DLRM
url: https://github.com/AMD-AGI/DLRMBenchmark
training_modes: [pretrain]

View File

@@ -1,120 +0,0 @@
unified_docker:
latest:
pull_tag: rocm/pytorch-training:v25.6
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
rocm_version: 6.4.1
pytorch_version: 2.8.0a0+git7d205b2
python_version: 3.10.17
transformer_engine_version: 1.14.0+2f85f5f2
flash_attention_version: 3.0.0.post1
hipblaslt_version: 0.15.0-8c6919d
triton_version: 3.3.0
model_groups:
- group: Pre-training
tag: pre-training
models:
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain]
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [pretrain]
- group: Fine-tuning
tag: fine-tuning
models:
- model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e
model_repo: Llama-4-17B_16E
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.3 70B
mad_tag: pyt_train_llama-3.3-70b
model_repo: Llama-3.3-70B
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.2 1B
mad_tag: pyt_train_llama-3.2-1b
model_repo: Llama-3.2-1B
url: https://huggingface.co/meta-llama/Llama-3.2-1B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 3B
mad_tag: pyt_train_llama-3.2-3b
model_repo: Llama-3.2-3B
url: https://huggingface.co/meta-llama/Llama-3.2-3B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 Vision 11B
mad_tag: pyt_train_llama-3.2-vision-11b
model_repo: Llama-3.2-Vision-11B
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.2 Vision 90B
mad_tag: pyt_train_llama-3.2-vision-90b
model_repo: Llama-3.2-Vision-90B
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16
training_modes: [finetune_qlora, HF_finetune_lora]
- model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3 70B
mad_tag: pyt_train_llama-3-70b
model_repo: Llama-3-70B
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 7B
mad_tag: pyt_train_llama-2-7b
model_repo: Llama-2-7B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 2 13B
mad_tag: pyt_train_llama-2-13b
model_repo: Llama-2-13B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 70B
mad_tag: pyt_train_llama-2-70b
model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]

View File

@@ -1,162 +0,0 @@
dockers:
- pull_tag: rocm/pytorch-training:v25.7
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.7/images/sha256-cc6fd840ab89cb81d926fc29eca6d075aee9875a55a522675a4b9231c9a0a712
components:
ROCm: 6.4.2
PyTorch: 2.8.0a0+gitd06a406
Python: 3.10.18
Transformer Engine: 2.2.0.dev0+94e53dd8
Flash Attention: 3.0.0.post1
hipBLASLt: 1.1.0-4b9a52edfc
Triton: 3.3.0
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e
model_repo: Llama-4-17B_16E
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.3 70B
mad_tag: pyt_train_llama-3.3-70b
model_repo: Llama-3.3-70B
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.2 1B
mad_tag: pyt_train_llama-3.2-1b
model_repo: Llama-3.2-1B
url: https://huggingface.co/meta-llama/Llama-3.2-1B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 3B
mad_tag: pyt_train_llama-3.2-3b
model_repo: Llama-3.2-3B
url: https://huggingface.co/meta-llama/Llama-3.2-3B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 Vision 11B
mad_tag: pyt_train_llama-3.2-vision-11b
model_repo: Llama-3.2-Vision-11B
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.2 Vision 90B
mad_tag: pyt_train_llama-3.2-vision-90b
model_repo: Llama-3.2-Vision-90B
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora]
- model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16
training_modes: [finetune_qlora]
- model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3 70B
mad_tag: pyt_train_llama-3-70b
model_repo: Llama-3-70B
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 7B
mad_tag: pyt_train_llama-2-7b
model_repo: Llama-2-7B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 2 13B
mad_tag: pyt_train_llama-2-13b
model_repo: Llama-2-13B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 70B
mad_tag: pyt_train_llama-2-70b
model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_lora, finetune_qlora]
- group: OpenAI
tag: openai
models:
- model: GPT OSS 20B
mad_tag: pyt_train_gpt_oss_20b
model_repo: GPT-OSS-20B
url: https://huggingface.co/openai/gpt-oss-20b
precision: BF16
training_modes: [HF_finetune_lora]
- model: GPT OSS 120B
mad_tag: pyt_train_gpt_oss_120b
model_repo: GPT-OSS-120B
url: https://huggingface.co/openai/gpt-oss-120b
precision: BF16
training_modes: [HF_finetune_lora]
- group: Qwen
tag: qwen
models:
- model: Qwen 3 8B
mad_tag: pyt_train_qwen3-8b
model_repo: Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 3 32B
mad_tag: pyt_train_qwen3-32b
model_repo: Qwen3-32
url: https://huggingface.co/Qwen/Qwen3-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 32B
mad_tag: pyt_train_qwen2.5-32b
model_repo: Qwen2.5-32B
url: https://huggingface.co/Qwen/Qwen2.5-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 72B
mad_tag: pyt_train_qwen2.5-72b
model_repo: Qwen2.5-72B
url: https://huggingface.co/Qwen/Qwen2.5-72B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2 1.5B
mad_tag: pyt_train_qwen2-1.5b
model_repo: Qwen2-1.5B
url: https://huggingface.co/Qwen/Qwen2-1.5B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 2 7B
mad_tag: pyt_train_qwen2-7b
model_repo: Qwen2-7B
url: https://huggingface.co/Qwen/Qwen2-7B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- group: Flux
tag: flux
models:
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [pretrain]

View File

@@ -1,178 +0,0 @@
dockers:
- pull_tag: rocm/pytorch-training:v25.8
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
components:
ROCm: 6.4.3
PyTorch: 2.8.0a0+gitd06a406
Python: 3.10.18
Transformer Engine: 2.2.0.dev0+a1e66aae
Flash Attention: 3.0.0.post1
hipBLASLt: 1.1.0-d1b517fc7a
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e
model_repo: Llama-4-17B_16E
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.3 70B
mad_tag: pyt_train_llama-3.3-70b
model_repo: Llama-3.3-70B
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.2 1B
mad_tag: pyt_train_llama-3.2-1b
model_repo: Llama-3.2-1B
url: https://huggingface.co/meta-llama/Llama-3.2-1B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 3B
mad_tag: pyt_train_llama-3.2-3b
model_repo: Llama-3.2-3B
url: https://huggingface.co/meta-llama/Llama-3.2-3B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 Vision 11B
mad_tag: pyt_train_llama-3.2-vision-11b
model_repo: Llama-3.2-Vision-11B
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.2 Vision 90B
mad_tag: pyt_train_llama-3.2-vision-90b
model_repo: Llama-3.2-Vision-90B
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora]
- model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16
training_modes: [finetune_qlora]
- model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3 70B
mad_tag: pyt_train_llama-3-70b
model_repo: Llama-3-70B
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 7B
mad_tag: pyt_train_llama-2-7b
model_repo: Llama-2-7B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 2 13B
mad_tag: pyt_train_llama-2-13b
model_repo: Llama-2-13B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 70B
mad_tag: pyt_train_llama-2-70b
model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_lora, finetune_qlora]
- group: OpenAI
tag: openai
models:
- model: GPT OSS 20B
mad_tag: pyt_train_gpt_oss_20b
model_repo: GPT-OSS-20B
url: https://huggingface.co/openai/gpt-oss-20b
precision: BF16
training_modes: [HF_finetune_lora]
- model: GPT OSS 120B
mad_tag: pyt_train_gpt_oss_120b
model_repo: GPT-OSS-120B
url: https://huggingface.co/openai/gpt-oss-120b
precision: BF16
training_modes: [HF_finetune_lora]
- group: Qwen
tag: qwen
models:
- model: Qwen 3 8B
mad_tag: pyt_train_qwen3-8b
model_repo: Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 3 32B
mad_tag: pyt_train_qwen3-32b
model_repo: Qwen3-32
url: https://huggingface.co/Qwen/Qwen3-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 32B
mad_tag: pyt_train_qwen2.5-32b
model_repo: Qwen2.5-32B
url: https://huggingface.co/Qwen/Qwen2.5-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 72B
mad_tag: pyt_train_qwen2.5-72b
model_repo: Qwen2.5-72B
url: https://huggingface.co/Qwen/Qwen2.5-72B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2 1.5B
mad_tag: pyt_train_qwen2-1.5b
model_repo: Qwen2-1.5B
url: https://huggingface.co/Qwen/Qwen2-1.5B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 2 7B
mad_tag: pyt_train_qwen2-7b
model_repo: Qwen2-7B
url: https://huggingface.co/Qwen/Qwen2-7B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- group: Stable Diffusion
tag: sd
models:
- model: Stable Diffusion XL
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
model_repo: SDXL
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
precision: BF16
training_modes: [finetune_lora]
- group: Flux
tag: flux
models:
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [pretrain]
- group: NCF
tag: ncf
models:
- model: NCF
mad_tag: pyt_ncf_training
model_repo:
url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
precision: FP32

View File

@@ -1,186 +0,0 @@
dockers:
MI355X and MI350X:
pull_tag: rocm/pytorch-training:v25.9_gfx950
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
components: &docker_components
ROCm: 7.0.0
Primus: aab4234
PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
Python: "3.10"
Transformer Engine: 2.2.0.dev0+54dd2bdc
Flash Attention: 2.8.3
hipBLASLt: 911283acd1
Triton: 3.4.0+rocm7.0.0.git56765e8c
RCCL: 2.26.6
MI325X and MI300X:
pull_tag: rocm/pytorch-training:v25.9_gfx942
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
components: *docker_components
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e
model_repo: Llama-4-17B_16E
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.3 70B
mad_tag: pyt_train_llama-3.3-70b
model_repo: Llama-3.3-70B
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.2 1B
mad_tag: pyt_train_llama-3.2-1b
model_repo: Llama-3.2-1B
url: https://huggingface.co/meta-llama/Llama-3.2-1B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 3B
mad_tag: pyt_train_llama-3.2-3b
model_repo: Llama-3.2-3B
url: https://huggingface.co/meta-llama/Llama-3.2-3B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.2 Vision 11B
mad_tag: pyt_train_llama-3.2-vision-11b
model_repo: Llama-3.2-Vision-11B
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.2 Vision 90B
mad_tag: pyt_train_llama-3.2-vision-90b
model_repo: Llama-3.2-Vision-90B
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
precision: BF16
training_modes: [finetune_fw]
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora]
- model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16
training_modes: [finetune_qlora]
- model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 3 70B
mad_tag: pyt_train_llama-3-70b
model_repo: Llama-3-70B
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 7B
mad_tag: pyt_train_llama-2-7b
model_repo: Llama-2-7B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 2 13B
mad_tag: pyt_train_llama-2-13b
model_repo: Llama-2-13B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Llama 2 70B
mad_tag: pyt_train_llama-2-70b
model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16
training_modes: [finetune_lora, finetune_qlora]
- group: OpenAI
tag: openai
models:
- model: GPT OSS 20B
mad_tag: pyt_train_gpt_oss_20b
model_repo: GPT-OSS-20B
url: https://huggingface.co/openai/gpt-oss-20b
precision: BF16
training_modes: [HF_finetune_lora]
- model: GPT OSS 120B
mad_tag: pyt_train_gpt_oss_120b
model_repo: GPT-OSS-120B
url: https://huggingface.co/openai/gpt-oss-120b
precision: BF16
training_modes: [HF_finetune_lora]
- group: Qwen
tag: qwen
models:
- model: Qwen 3 8B
mad_tag: pyt_train_qwen3-8b
model_repo: Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 3 32B
mad_tag: pyt_train_qwen3-32b
model_repo: Qwen3-32
url: https://huggingface.co/Qwen/Qwen3-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 32B
mad_tag: pyt_train_qwen2.5-32b
model_repo: Qwen2.5-32B
url: https://huggingface.co/Qwen/Qwen2.5-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 72B
mad_tag: pyt_train_qwen2.5-72b
model_repo: Qwen2.5-72B
url: https://huggingface.co/Qwen/Qwen2.5-72B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2 1.5B
mad_tag: pyt_train_qwen2-1.5b
model_repo: Qwen2-1.5B
url: https://huggingface.co/Qwen/Qwen2-1.5B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 2 7B
mad_tag: pyt_train_qwen2-7b
model_repo: Qwen2-7B
url: https://huggingface.co/Qwen/Qwen2-7B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- group: Stable Diffusion
tag: sd
models:
- model: Stable Diffusion XL
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
model_repo: SDXL
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
precision: BF16
training_modes: [posttrain-p]
- group: Flux
tag: flux
models:
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [posttrain-p]
- group: NCF
tag: ncf
models:
- model: NCF
mad_tag: pyt_ncf_training
model_repo:
url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
precision: FP32

View File

@@ -1,15 +1,15 @@
docker: dockers:
pull_tag: rocm/primus:v25.11 - pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197 docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components: components:
ROCm: 7.1.0 ROCm: 6.4.2
PyTorch: 2.10.0.dev20251112+rocm7.1 Primus: v0.1.0-rc1
Python: "3.10" PyTorch: 2.8.0a0+gitd06a406
Transformer Engine: 2.4.0.dev0+32e2d1d4 Python: "3.10"
Flash Attention: 2.8.3 Transformer Engine: 2.1.0.dev0+ba586519
hipBLASLt: 1.2.0-09ab7153e2 hipBLASLt: 37ba1d36
Triton: 3.4.0 Triton: 3.3.0
RCCL: 2.27.7 RCCL: 2.22.3
model_groups: model_groups:
- group: Meta Llama - group: Meta Llama
tag: llama tag: llama

View File

@@ -1,32 +0,0 @@
docker:
pull_tag: rocm/primus:v25.11
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
components:
ROCm: 7.1.0
PyTorch: 2.10.0.dev20251112+rocm7.1
Python: "3.10"
Transformer Engine: 2.4.0.dev0+32e2d1d4
Flash Attention: 2.8.3
hipBLASLt: 1.2.0-09ab7153e2
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: primus_pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
- model: Llama 3.1 70B
mad_tag: primus_pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek V3 16B
mad_tag: primus_pyt_train_deepseek-v3-16b
model_repo: DeepSeek-V3
url: https://huggingface.co/deepseek-ai/DeepSeek-V3
precision: BF16

View File

@@ -1,18 +1,38 @@
docker: unified_docker:
pull_tag: rocm/primus:v25.10 latest:
docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197 pull_tag: rocm/pytorch-training:v25.6
components: docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
ROCm: 7.1.0 rocm_version: 6.4.1
Primus: 0.3.0 pytorch_version: 2.8.0a0+git7d205b2
Primus Turbo: 0.1.1 python_version: 3.10.17
PyTorch: 2.10.0.dev20251112+rocm7.1 transformer_engine_version: 1.14.0+2f85f5f2
Python: "3.10" flash_attention_version: 3.0.0.post1
Transformer Engine: 2.4.0.dev0+32e2d1d4 hipblaslt_version: 0.15.0-8c6919d
Flash Attention: 2.8.3 triton_version: 3.3.0
hipBLASLt: 1.2.0-09ab7153e2
model_groups: model_groups:
- group: Meta Llama - group: Pre-training
tag: llama tag: pre-training
models:
- model: Llama 3.1 8B
mad_tag: pyt_train_llama-3.1-8b
model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16
training_modes: [pretrain]
- model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: BF16
training_modes: [pretrain]
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [pretrain]
- group: Fine-tuning
tag: fine-tuning
models: models:
- model: Llama 4 Scout 17B-16E - model: Llama 4 Scout 17B-16E
mad_tag: pyt_train_llama-4-scout-17b-16e mad_tag: pyt_train_llama-4-scout-17b-16e
@@ -55,19 +75,19 @@ model_groups:
model_repo: Llama-3.1-8B model_repo: Llama-3.1-8B
url: https://huggingface.co/meta-llama/Llama-3.1-8B url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: BF16 precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain] training_modes: [finetune_fw, finetune_lora]
- model: Llama 3.1 70B - model: Llama 3.1 70B
mad_tag: pyt_train_llama-3.1-70b mad_tag: pyt_train_llama-3.1-70b
model_repo: Llama-3.1-70B model_repo: Llama-3.1-70B
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct url: https://huggingface.co/meta-llama/Llama-3.1-70B
precision: BF16 precision: BF16
training_modes: [pretrain, finetune_fw, finetune_lora] training_modes: [finetune_fw, finetune_lora, finetune_qlora]
- model: Llama 3.1 405B - model: Llama 3.1 405B
mad_tag: pyt_train_llama-3.1-405b mad_tag: pyt_train_llama-3.1-405b
model_repo: Llama-3.1-405B model_repo: Llama-3.1-405B
url: https://huggingface.co/meta-llama/Llama-3.1-405B url: https://huggingface.co/meta-llama/Llama-3.1-405B
precision: BF16 precision: BF16
training_modes: [finetune_qlora] training_modes: [finetune_qlora, HF_finetune_lora]
- model: Llama 3 8B - model: Llama 3 8B
mad_tag: pyt_train_llama-3-8b mad_tag: pyt_train_llama-3-8b
model_repo: Llama-3-8B model_repo: Llama-3-8B
@@ -97,101 +117,4 @@ model_groups:
model_repo: Llama-2-70B model_repo: Llama-2-70B
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2 url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
precision: BF16 precision: BF16
training_modes: [finetune_lora, finetune_qlora] training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]
- group: OpenAI
tag: openai
models:
- model: GPT OSS 20B
mad_tag: pyt_train_gpt_oss_20b
model_repo: GPT-OSS-20B
url: https://huggingface.co/openai/gpt-oss-20b
precision: BF16
training_modes: [HF_finetune_lora]
- model: GPT OSS 120B
mad_tag: pyt_train_gpt_oss_120b
model_repo: GPT-OSS-120B
url: https://huggingface.co/openai/gpt-oss-120b
precision: BF16
training_modes: [HF_finetune_lora]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek V2 16B
mad_tag: primus_pyt_train_deepseek-v2
model_repo: DeepSeek-V2
url: https://huggingface.co/deepseek-ai/DeepSeek-V2
precision: BF16
training_modes: [pretrain]
- group: Qwen
tag: qwen
models:
- model: Qwen 3 8B
mad_tag: pyt_train_qwen3-8b
model_repo: Qwen3-8B
url: https://huggingface.co/Qwen/Qwen3-8B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 3 32B
mad_tag: pyt_train_qwen3-32b
model_repo: Qwen3-32
url: https://huggingface.co/Qwen/Qwen3-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 32B
mad_tag: pyt_train_qwen2.5-32b
model_repo: Qwen2.5-32B
url: https://huggingface.co/Qwen/Qwen2.5-32B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2.5 72B
mad_tag: pyt_train_qwen2.5-72b
model_repo: Qwen2.5-72B
url: https://huggingface.co/Qwen/Qwen2.5-72B
precision: BF16
training_modes: [finetune_lora]
- model: Qwen 2 1.5B
mad_tag: pyt_train_qwen2-1.5b
model_repo: Qwen2-1.5B
url: https://huggingface.co/Qwen/Qwen2-1.5B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- model: Qwen 2 7B
mad_tag: pyt_train_qwen2-7b
model_repo: Qwen2-7B
url: https://huggingface.co/Qwen/Qwen2-7B
precision: BF16
training_modes: [finetune_fw, finetune_lora]
- group: Stable Diffusion
tag: sd
models:
- model: Stable Diffusion XL
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
model_repo: SDXL
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
precision: BF16
training_modes: [posttrain]
- group: Flux
tag: flux
models:
- model: FLUX.1-dev
mad_tag: pyt_train_flux
model_repo: Flux
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
precision: BF16
training_modes: [posttrain]
- group: NCF
tag: ncf
models:
- model: NCF
mad_tag: pyt_ncf_training
model_repo:
url: https://github.com/ROCm/FluxBenchmark
precision: FP32
- group: DLRM
tag: dlrm
models:
- model: DLRM v2
mad_tag: pyt_train_dlrm
model_repo: DLRM
url: https://github.com/AMD-AGI/DLRMBenchmark
training_modes: [pretrain]

Some files were not shown because too many files have changed in this diff Show More