mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-07 22:03:58 -05:00
Merge remote-tracking branch 'external/develop' into sync-develop-from-external
This commit is contained in:
@@ -128,6 +128,9 @@ jobs:
|
|||||||
parameters:
|
parameters:
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
pipModules: ${{ parameters.pipModules }}
|
pipModules: ${{ parameters.pipModules }}
|
||||||
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||||
|
parameters:
|
||||||
|
cmakeVersion: '3.28.6'
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||||
parameters:
|
parameters:
|
||||||
@@ -152,6 +155,7 @@ jobs:
|
|||||||
-DCMAKE_BUILD_TYPE=Release
|
-DCMAKE_BUILD_TYPE=Release
|
||||||
-DGPU_TARGETS=${{ job.target }}
|
-DGPU_TARGETS=${{ job.target }}
|
||||||
-DAMDGPU_TARGETS=${{ job.target }}
|
-DAMDGPU_TARGETS=${{ job.target }}
|
||||||
|
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||||
@@ -192,6 +196,9 @@ jobs:
|
|||||||
parameters:
|
parameters:
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
pipModules: ${{ parameters.pipModules }}
|
pipModules: ${{ parameters.pipModules }}
|
||||||
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||||
|
parameters:
|
||||||
|
cmakeVersion: '3.28.6'
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||||
parameters:
|
parameters:
|
||||||
@@ -217,6 +224,7 @@ jobs:
|
|||||||
-DCMAKE_BUILD_TYPE=Release
|
-DCMAKE_BUILD_TYPE=Release
|
||||||
-DGPU_TARGETS=${{ job.target }}
|
-DGPU_TARGETS=${{ job.target }}
|
||||||
-DAMDGPU_TARGETS=${{ job.target }}
|
-DAMDGPU_TARGETS=${{ job.target }}
|
||||||
|
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||||
|
|||||||
@@ -1,10 +1,29 @@
|
|||||||
parameters:
|
parameters:
|
||||||
|
- name: componentName
|
||||||
|
type: string
|
||||||
|
default: amdsmi
|
||||||
- name: checkoutRepo
|
- name: checkoutRepo
|
||||||
type: string
|
type: string
|
||||||
default: 'self'
|
default: 'self'
|
||||||
- name: checkoutRef
|
- name: checkoutRef
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
|
# monorepo related parameters
|
||||||
|
- name: sparseCheckoutDir
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
- name: triggerDownstreamJobs
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
- name: downstreamAggregateNames
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
- name: buildDependsOn
|
||||||
|
type: object
|
||||||
|
default: null
|
||||||
|
- name: unifiedBuild
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
# set to true if doing full build of ROCm stack
|
# set to true if doing full build of ROCm stack
|
||||||
# and dependencies are pulled from same pipeline
|
# and dependencies are pulled from same pipeline
|
||||||
- name: aggregatePipeline
|
- name: aggregatePipeline
|
||||||
@@ -31,7 +50,7 @@ parameters:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||||
- job: amdsmi_build_${{ job.os }}
|
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||||
pool:
|
pool:
|
||||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||||
vmImage: 'ubuntu-24.04'
|
vmImage: 'ubuntu-24.04'
|
||||||
@@ -55,6 +74,7 @@ jobs:
|
|||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||||
parameters:
|
parameters:
|
||||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||||
|
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||||
parameters:
|
parameters:
|
||||||
os: ${{ job.os }}
|
os: ${{ job.os }}
|
||||||
@@ -65,50 +85,54 @@ jobs:
|
|||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||||
parameters:
|
parameters:
|
||||||
os: ${{ job.os }}
|
os: ${{ job.os }}
|
||||||
|
componentName: ${{ parameters.componentName }}
|
||||||
|
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||||
parameters:
|
parameters:
|
||||||
os: ${{ job.os }}
|
os: ${{ job.os }}
|
||||||
|
componentName: ${{ parameters.componentName }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||||
# parameters:
|
# parameters:
|
||||||
# aptPackages: ${{ parameters.aptPackages }}
|
# aptPackages: ${{ parameters.aptPackages }}
|
||||||
|
|
||||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||||
- job: amdsmi_test_${{ job.os }}_${{ job.target }}
|
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||||
dependsOn: amdsmi_build_${{ job.os }}
|
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||||
condition:
|
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||||
and(succeeded(),
|
condition:
|
||||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
and(succeeded(),
|
||||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||||
eq(${{ parameters.aggregatePipeline }}, False)
|
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||||
)
|
eq(${{ parameters.aggregatePipeline }}, False)
|
||||||
variables:
|
)
|
||||||
- group: common
|
variables:
|
||||||
- template: /.azuredevops/variables-global.yml
|
- group: common
|
||||||
pool: ${{ job.target }}_test_pool
|
- template: /.azuredevops/variables-global.yml
|
||||||
workspace:
|
pool: ${{ job.target }}_test_pool
|
||||||
clean: all
|
workspace:
|
||||||
steps:
|
clean: all
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
steps:
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
parameters:
|
||||||
packageManager: ${{ job.packageManager }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
packageManager: ${{ job.packageManager }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||||
os: ${{ job.os }}
|
parameters:
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
os: ${{ job.os }}
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||||
runRocminfo: false
|
parameters:
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
runRocminfo: false
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||||
componentName: amdsmi
|
parameters:
|
||||||
testDir: '$(Agent.BuildDirectory)'
|
componentName: ${{ parameters.componentName }}
|
||||||
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
|
testDir: '$(Agent.BuildDirectory)'
|
||||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
|
||||||
os: ${{ job.os }}
|
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
os: ${{ job.os }}
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
parameters:
|
||||||
environment: test
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
gpuTarget: ${{ job.target }}
|
environment: test
|
||||||
|
gpuTarget: ${{ job.target }}
|
||||||
|
|||||||
@@ -1,10 +1,29 @@
|
|||||||
parameters:
|
parameters:
|
||||||
|
- name: componentName
|
||||||
|
type: string
|
||||||
|
default: hipTensor
|
||||||
- name: checkoutRepo
|
- name: checkoutRepo
|
||||||
type: string
|
type: string
|
||||||
default: 'self'
|
default: 'self'
|
||||||
- name: checkoutRef
|
- name: checkoutRef
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
|
# monorepo related parameters
|
||||||
|
- name: sparseCheckoutDir
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
- name: triggerDownstreamJobs
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
- name: downstreamAggregateNames
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
- name: buildDependsOn
|
||||||
|
type: object
|
||||||
|
default: null
|
||||||
|
- name: unifiedBuild
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
# set to true if doing full build of ROCm stack
|
# set to true if doing full build of ROCm stack
|
||||||
# and dependencies are pulled from same pipeline
|
# and dependencies are pulled from same pipeline
|
||||||
- name: aggregatePipeline
|
- name: aggregatePipeline
|
||||||
@@ -51,7 +70,7 @@ parameters:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||||
- job: hipTensor_build_${{ job.target }}
|
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||||
variables:
|
variables:
|
||||||
- group: common
|
- group: common
|
||||||
- template: /.azuredevops/variables-global.yml
|
- template: /.azuredevops/variables-global.yml
|
||||||
@@ -66,12 +85,15 @@ jobs:
|
|||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||||
parameters:
|
parameters:
|
||||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||||
|
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||||
parameters:
|
parameters:
|
||||||
checkoutRef: ${{ parameters.checkoutRef }}
|
checkoutRef: ${{ parameters.checkoutRef }}
|
||||||
dependencyList: ${{ parameters.rocmDependencies }}
|
dependencyList: ${{ parameters.rocmDependencies }}
|
||||||
gpuTarget: ${{ job.target }}
|
gpuTarget: ${{ job.target }}
|
||||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||||
|
${{ if parameters.triggerDownstreamJobs }}:
|
||||||
|
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||||
parameters:
|
parameters:
|
||||||
extraBuildFlags: >-
|
extraBuildFlags: >-
|
||||||
@@ -85,9 +107,12 @@ jobs:
|
|||||||
-GNinja
|
-GNinja
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||||
parameters:
|
parameters:
|
||||||
|
componentName: ${{ parameters.componentName }}
|
||||||
|
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||||
gpuTarget: ${{ job.target }}
|
gpuTarget: ${{ job.target }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||||
parameters:
|
parameters:
|
||||||
|
componentName: ${{ parameters.componentName }}
|
||||||
gpuTarget: ${{ job.target }}
|
gpuTarget: ${{ job.target }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||||
@@ -95,44 +120,47 @@ jobs:
|
|||||||
aptPackages: ${{ parameters.aptPackages }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
gpuTarget: ${{ job.target }}
|
gpuTarget: ${{ job.target }}
|
||||||
|
|
||||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||||
- job: hipTensor_test_${{ job.target }}
|
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||||
timeoutInMinutes: 90
|
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||||
dependsOn: hipTensor_build_${{ job.target }}
|
timeoutInMinutes: 90
|
||||||
condition:
|
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||||
and(succeeded(),
|
condition:
|
||||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
and(succeeded(),
|
||||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||||
eq(${{ parameters.aggregatePipeline }}, False)
|
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||||
)
|
eq(${{ parameters.aggregatePipeline }}, False)
|
||||||
variables:
|
)
|
||||||
- group: common
|
variables:
|
||||||
- template: /.azuredevops/variables-global.yml
|
- group: common
|
||||||
pool: ${{ job.target }}_test_pool
|
- template: /.azuredevops/variables-global.yml
|
||||||
workspace:
|
pool: ${{ job.target }}_test_pool
|
||||||
clean: all
|
workspace:
|
||||||
steps:
|
clean: all
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
steps:
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
parameters:
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||||
gpuTarget: ${{ job.target }}
|
parameters:
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
gpuTarget: ${{ job.target }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||||
parameters:
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||||
checkoutRef: ${{ parameters.checkoutRef }}
|
parameters:
|
||||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
checkoutRef: ${{ parameters.checkoutRef }}
|
||||||
gpuTarget: ${{ job.target }}
|
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
gpuTarget: ${{ job.target }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
${{ if parameters.triggerDownstreamJobs }}:
|
||||||
parameters:
|
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||||
componentName: hipTensor
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||||
testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
parameters:
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
componentName: ${{ parameters.componentName }}
|
||||||
parameters:
|
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||||
environment: test
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||||
gpuTarget: ${{ job.target }}
|
parameters:
|
||||||
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
|
environment: test
|
||||||
|
gpuTarget: ${{ job.target }}
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ jobs:
|
|||||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||||
timeoutInMinutes: 270
|
timeoutInMinutes: 350
|
||||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||||
condition:
|
condition:
|
||||||
and(succeeded(),
|
and(succeeded(),
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ parameters:
|
|||||||
- llvm-project
|
- llvm-project
|
||||||
- MIOpen
|
- MIOpen
|
||||||
- MIVisionX
|
- MIVisionX
|
||||||
|
- rocm_smi_lib
|
||||||
- rccl
|
- rccl
|
||||||
- rocALUTION
|
- rocALUTION
|
||||||
- rocBLAS
|
- rocBLAS
|
||||||
@@ -100,6 +101,7 @@ parameters:
|
|||||||
- llvm-project
|
- llvm-project
|
||||||
- MIOpen
|
- MIOpen
|
||||||
- MIVisionX
|
- MIVisionX
|
||||||
|
- rocm_smi_lib
|
||||||
- rccl
|
- rccl
|
||||||
- rocALUTION
|
- rocALUTION
|
||||||
- rocBLAS
|
- rocBLAS
|
||||||
@@ -146,6 +148,7 @@ jobs:
|
|||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||||
parameters:
|
parameters:
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
|
pipModules: ${{ parameters.pipModules }}
|
||||||
registerROCmPackages: true
|
registerROCmPackages: true
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||||
parameters:
|
parameters:
|
||||||
@@ -245,5 +248,6 @@ jobs:
|
|||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||||
parameters:
|
parameters:
|
||||||
aptPackages: ${{ parameters.aptPackages }}
|
aptPackages: ${{ parameters.aptPackages }}
|
||||||
|
pipModules: ${{ parameters.pipModules }}
|
||||||
environment: test
|
environment: test
|
||||||
gpuTarget: ${{ job.target }}
|
gpuTarget: ${{ job.target }}
|
||||||
|
|||||||
@@ -65,6 +65,13 @@ parameters:
|
|||||||
- pytest
|
- pytest
|
||||||
- pytest-cov
|
- pytest-cov
|
||||||
- pytest-xdist
|
- pytest-xdist
|
||||||
|
- name: rocmDependencies
|
||||||
|
type: object
|
||||||
|
default:
|
||||||
|
- clr
|
||||||
|
- llvm-project
|
||||||
|
- ROCR-Runtime
|
||||||
|
- rocprofiler-sdk
|
||||||
- name: rocmTestDependencies
|
- name: rocmTestDependencies
|
||||||
type: object
|
type: object
|
||||||
default:
|
default:
|
||||||
@@ -101,10 +108,12 @@ jobs:
|
|||||||
${{ if parameters.buildDependsOn }}:
|
${{ if parameters.buildDependsOn }}:
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- ${{ each build in parameters.buildDependsOn }}:
|
- ${{ each build in parameters.buildDependsOn }}:
|
||||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
- ${{ build }}_${{ job.target }}
|
||||||
variables:
|
variables:
|
||||||
- group: common
|
- group: common
|
||||||
- template: /.azuredevops/variables-global.yml
|
- template: /.azuredevops/variables-global.yml
|
||||||
|
- name: ROCM_PATH
|
||||||
|
value: $(Agent.BuildDirectory)/rocm
|
||||||
pool:
|
pool:
|
||||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||||
workspace:
|
workspace:
|
||||||
@@ -119,6 +128,14 @@ jobs:
|
|||||||
parameters:
|
parameters:
|
||||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||||
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||||
|
parameters:
|
||||||
|
checkoutRef: ${{ parameters.checkoutRef }}
|
||||||
|
dependencyList: ${{ parameters.rocmDependencies }}
|
||||||
|
gpuTarget: ${{ job.target }}
|
||||||
|
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||||
|
${{ if parameters.triggerDownstreamJobs }}:
|
||||||
|
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||||
parameters:
|
parameters:
|
||||||
extraBuildFlags: >-
|
extraBuildFlags: >-
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ parameters:
|
|||||||
libopenblas-dev: openblas-devel
|
libopenblas-dev: openblas-devel
|
||||||
libopenmpi-dev: openmpi-devel
|
libopenmpi-dev: openmpi-devel
|
||||||
libpci-dev: libpciaccess-devel
|
libpci-dev: libpciaccess-devel
|
||||||
|
libsimde-dev: simde-devel
|
||||||
libssl-dev: openssl-devel
|
libssl-dev: openssl-devel
|
||||||
# note: libstdc++-devel is in the base packages list
|
# note: libstdc++-devel is in the base packages list
|
||||||
libsystemd-dev: systemd-devel
|
libsystemd-dev: systemd-devel
|
||||||
|
|||||||
@@ -35,8 +35,8 @@ parameters:
|
|||||||
developBranch: develop
|
developBranch: develop
|
||||||
hasGpuTarget: true
|
hasGpuTarget: true
|
||||||
amdsmi:
|
amdsmi:
|
||||||
pipelineId: 99
|
pipelineId: 376
|
||||||
developBranch: amd-staging
|
developBranch: develop
|
||||||
hasGpuTarget: false
|
hasGpuTarget: false
|
||||||
aomp-extras:
|
aomp-extras:
|
||||||
pipelineId: 111
|
pipelineId: 111
|
||||||
@@ -115,7 +115,7 @@ parameters:
|
|||||||
developBranch: develop
|
developBranch: develop
|
||||||
hasGpuTarget: true
|
hasGpuTarget: true
|
||||||
hipTensor:
|
hipTensor:
|
||||||
pipelineId: 105
|
pipelineId: 374
|
||||||
developBranch: develop
|
developBranch: develop
|
||||||
hasGpuTarget: true
|
hasGpuTarget: true
|
||||||
llvm-project:
|
llvm-project:
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ EoS
|
|||||||
etcd
|
etcd
|
||||||
fas
|
fas
|
||||||
FBGEMM
|
FBGEMM
|
||||||
|
FiLM
|
||||||
FIFOs
|
FIFOs
|
||||||
FFT
|
FFT
|
||||||
FFTs
|
FFTs
|
||||||
@@ -160,10 +161,12 @@ Fortran
|
|||||||
Fuyu
|
Fuyu
|
||||||
GALB
|
GALB
|
||||||
GAT
|
GAT
|
||||||
|
GATNE
|
||||||
GCC
|
GCC
|
||||||
GCD
|
GCD
|
||||||
GCDs
|
GCDs
|
||||||
GCN
|
GCN
|
||||||
|
GCNN
|
||||||
GDB
|
GDB
|
||||||
GDDR
|
GDDR
|
||||||
GDR
|
GDR
|
||||||
@@ -182,6 +185,8 @@ Glibc
|
|||||||
GLXT
|
GLXT
|
||||||
Gloo
|
Gloo
|
||||||
GMI
|
GMI
|
||||||
|
GNN
|
||||||
|
GNNs
|
||||||
GPG
|
GPG
|
||||||
GPR
|
GPR
|
||||||
GPT
|
GPT
|
||||||
|
|||||||
23
CHANGELOG.md
23
CHANGELOG.md
@@ -233,7 +233,7 @@ for a complete overview of this release.
|
|||||||
* Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.
|
* Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Composable Kernel** (1.1.0)
|
### **Composable Kernel** (1.1.0)
|
||||||
@@ -677,7 +677,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
|
|||||||
* Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
|
* Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
|
||||||
* Interactive metric descriptions in TUI analyze mode.
|
* Interactive metric descriptions in TUI analyze mode.
|
||||||
* You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
|
* You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
|
||||||
* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option.
|
* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option.
|
||||||
* `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC
|
* `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC
|
||||||
|
|
||||||
* `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
|
* `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
|
||||||
@@ -763,7 +763,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
|
|||||||
* MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
|
* MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
|
||||||
* A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
|
* A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
|
||||||
* The profile mode crashes when `--format-rocprof-output json` is selected.
|
* The profile mode crashes when `--format-rocprof-output json` is selected.
|
||||||
* As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.
|
* As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.
|
||||||
|
|
||||||
### **ROCm Data Center Tool** (1.2.0)
|
### **ROCm Data Center Tool** (1.2.0)
|
||||||
|
|
||||||
@@ -804,6 +804,14 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
|
|||||||
- Updated PAPI module to v7.2.0b2.
|
- Updated PAPI module to v7.2.0b2.
|
||||||
- ROCprofiler-SDK is now used for tracing OMPT API calls.
|
- ROCprofiler-SDK is now used for tracing OMPT API calls.
|
||||||
|
|
||||||
|
#### Known issues
|
||||||
|
|
||||||
|
* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment:
|
||||||
|
|
||||||
|
```
|
||||||
|
export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH
|
||||||
|
```
|
||||||
|
|
||||||
### **rocPRIM** (4.1.0)
|
### **rocPRIM** (4.1.0)
|
||||||
|
|
||||||
#### Added
|
#### Added
|
||||||
@@ -881,17 +889,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE
|
|||||||
|
|
||||||
### **rocSOLVER** (3.31.0)
|
### **rocSOLVER** (3.31.0)
|
||||||
|
|
||||||
#### Added
|
|
||||||
|
|
||||||
* Hybrid computation support for existing routines: STEQR
|
|
||||||
|
|
||||||
#### Optimized
|
#### Optimized
|
||||||
|
|
||||||
Improved the performance of:
|
Improved the performance of:
|
||||||
|
|
||||||
* BDSQR and downstream functions such as GESVD.
|
* LARF, LARFT, GEQR2, and downstream functions such as GEQRF.
|
||||||
* STEQR and downstream functions such as SYEV/HEEV.
|
* STEDC and divide and conquer Eigensolvers.
|
||||||
* LARFT and downstream functions such as GEQR2 and GEQRF.
|
|
||||||
|
|
||||||
### **rocSPARSE** (4.1.0)
|
### **rocSPARSE** (4.1.0)
|
||||||
|
|
||||||
|
|||||||
20
RELEASE.md
20
RELEASE.md
@@ -676,6 +676,10 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid
|
|||||||
|
|
||||||
- Fixed output for `amd-smi xgmi -l --json`.
|
- Fixed output for `amd-smi xgmi -l --json`.
|
||||||
|
|
||||||
|
```{note}
|
||||||
|
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
|
||||||
|
```
|
||||||
|
|
||||||
### **Composable Kernel** (1.1.0)
|
### **Composable Kernel** (1.1.0)
|
||||||
|
|
||||||
#### Upcoming changes
|
#### Upcoming changes
|
||||||
@@ -863,6 +867,22 @@ be fixed in a future ROCm release.
|
|||||||
|
|
||||||
Due to partial data corruption of Electrically Erasable Programmable Read-Only Memory (EEPROM) and limited error handling in the AMD GPU Driver(amdgpu), excessive log output might result when querying the reliability, availability, and serviceability (RAS) bad pages. This issue will be fixed in a future AMD GPU Driver(amdgpu) and ROCm release.
|
Due to partial data corruption of Electrically Erasable Programmable Read-Only Memory (EEPROM) and limited error handling in the AMD GPU Driver(amdgpu), excessive log output might result when querying the reliability, availability, and serviceability (RAS) bad pages. This issue will be fixed in a future AMD GPU Driver(amdgpu) and ROCm release.
|
||||||
|
|
||||||
|
### OpenBLAS runtime dependency for hipblastlt-test and hipblaslt-bench
|
||||||
|
|
||||||
|
Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS development package results in the following error:
|
||||||
|
```
|
||||||
|
libopenblas.so.0: cannot open shared object file: No such file or directory
|
||||||
|
```
|
||||||
|
As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. See [GitHub issue #5639](https://github.com/ROCm/ROCm/issues/5639).
|
||||||
|
|
||||||
|
### Reduced precision in gemm_ex operations for rocBLAS and hipBLAS
|
||||||
|
|
||||||
|
Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640).
|
||||||
|
|
||||||
|
### RCCL profiler plugin failure with AllToAll operations
|
||||||
|
|
||||||
|
The RCCL profiler plugin `librccl-profiler.so` might fail with a segmentation fault during `AllToAll` collective operations due to improperly assigned point-to-point task function pointers. This leads to invalid memory access and prevents profiling of `AllToAll` performance. Other operations, like `AllReduce`, are unaffected. It's recommended to avoid using the RCCL profiler plugin with `AllToAll` operations until the fix is available. This issue is resolved in the {fab}`github`[RCCL `develop` branch](https://github.com/ROCm/rccl/tree/develop) and will be part of a future ROCm release. See [GitHub issue #5653](https://github.com/ROCm/ROCm/issues/5653).
|
||||||
|
|
||||||
## ROCm resolved issues
|
## ROCm resolved issues
|
||||||
|
|
||||||
The following are previously known issues resolved in this release. For resolved issues related to
|
The following are previously known issues resolved in this release. For resolved issues related to
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<manifest>
|
<manifest>
|
||||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||||
<default revision="refs/tags/rocm-7.1.0"
|
<default revision="refs/tags/rocm-7.1.1"
|
||||||
remote="rocm-org"
|
remote="rocm-org"
|
||||||
sync-c="true"
|
sync-c="true"
|
||||||
sync-j="4" />
|
sync-j="4" />
|
||||||
@@ -25,6 +25,7 @@
|
|||||||
<project groups="mathlibs" name="MIVisionX" />
|
<project groups="mathlibs" name="MIVisionX" />
|
||||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||||
<project groups="mathlibs" name="composable_kernel" />
|
<project groups="mathlibs" name="composable_kernel" />
|
||||||
|
<project groups="mathlibs" name="hipSOLVER" />
|
||||||
<project groups="mathlibs" name="hipTensor" />
|
<project groups="mathlibs" name="hipTensor" />
|
||||||
<project groups="mathlibs" name="hipfort" />
|
<project groups="mathlibs" name="hipfort" />
|
||||||
<project groups="mathlibs" name="rccl" />
|
<project groups="mathlibs" name="rccl" />
|
||||||
@@ -45,6 +46,7 @@
|
|||||||
rocprofiler rocr-runtime roctracer -->
|
rocprofiler rocr-runtime roctracer -->
|
||||||
<project groups="mathlibs" name="rocm-systems" />
|
<project groups="mathlibs" name="rocm-systems" />
|
||||||
<project groups="mathlibs" name="rocPyDecode" />
|
<project groups="mathlibs" name="rocPyDecode" />
|
||||||
|
<project groups="mathlibs" name="rocSOLVER" />
|
||||||
<project groups="mathlibs" name="rocSHMEM" />
|
<project groups="mathlibs" name="rocSHMEM" />
|
||||||
<project groups="mathlibs" name="rocWMMA" />
|
<project groups="mathlibs" name="rocWMMA" />
|
||||||
<project groups="mathlibs" name="rocm-cmake" />
|
<project groups="mathlibs" name="rocm-cmake" />
|
||||||
|
|||||||
@@ -155,8 +155,38 @@ compatibility and system requirements.
|
|||||||
|
|
||||||
.. rubric:: Footnotes
|
.. rubric:: Footnotes
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`_.
|
.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`_.
|
||||||
.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`_.
|
.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`_.
|
||||||
|
=======
|
||||||
|
.. [#rhel-10-702] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU.
|
||||||
|
.. [#rhel-94-702] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`.
|
||||||
|
.. [#rhel-700] RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
|
||||||
|
.. [#sles-710] **For ROCm 7.1.x** - SLES 15 SP7 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
|
||||||
|
.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
|
||||||
|
.. [#ol-710-mi300x] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
|
||||||
|
.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
|
||||||
|
.. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
|
||||||
|
.. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs.
|
||||||
|
.. [#db12-710] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
|
||||||
|
.. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
|
||||||
|
.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
|
||||||
|
.. [#rl-700] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs.
|
||||||
|
.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
|
||||||
|
.. [#mi350x-os-710] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs supports all listed :ref:`supported_distributions` except RHEL 8.10, SLES 15 SP7, Debian 12, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
|
||||||
|
.. [#mi350x-os-700] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9.
|
||||||
|
.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, and RHEL 9.6.
|
||||||
|
.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0.
|
||||||
|
.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) GPUs only supports Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||||
|
.. [#mi325x-os-710] **For ROCm 7.1.x** - AMD Instinct MI325X GPUs (gfx942) supports all listed :ref:`supported_distributions` except RHEL 8.10, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
|
||||||
|
.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPUs (gfx942) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||||
|
.. [#mi300x-os] **Starting ROCm 7.0.x** - AMD Instinct MI300X GPUs (gfx942) supports all listed :ref:`supported_distributions`.
|
||||||
|
.. [#mi300A-os] **Starting ROCm 7.0.x** - AMD Instinct MI300A GPUs (gfx942) supports all listed :ref:`supported_distributions` except on Debian 13, Azure Linux 3.0, Oracle Linux 10, Oracle Linux 9, and Oracle Linux 8.
|
||||||
|
.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||||
|
.. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7.
|
||||||
|
.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||||
|
.. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
|
||||||
|
>>>>>>> external/develop
|
||||||
.. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
|
.. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
|
||||||
.. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
|
.. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
|
||||||
.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
|
.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
|
||||||
@@ -243,7 +273,7 @@ Expand for full historical view of:
|
|||||||
.. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
|
.. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
|
||||||
.. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
|
.. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
|
||||||
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
|
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
|
||||||
.. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0.
|
.. [#dgl_compat-past-60] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
|
||||||
.. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
|
.. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
|
||||||
.. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
|
.. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
|
||||||
.. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.
|
.. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.
|
||||||
|
|||||||
@@ -39,13 +39,13 @@ Support overview
|
|||||||
Version support
|
Version support
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
DGL is supported on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
DGL is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__,
|
||||||
|
`ROCm 6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__, and `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
||||||
|
|
||||||
Supported devices
|
Supported devices
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
- **Officially Supported**: AMD Instinct™ MI300X (through `hipBLASlt <https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/index.html>`__)
|
**Officially Supported**: AMD Instinct™ MI300X, MI250X
|
||||||
- **Partially Supported**: AMD Instinct™ MI250X
|
|
||||||
|
|
||||||
.. _dgl-recommendations:
|
.. _dgl-recommendations:
|
||||||
|
|
||||||
@@ -60,16 +60,35 @@ GAT, GCN, and GraphSage. Using these models, a variety of use cases are supporte
|
|||||||
- 1D (Temporal) and 2D (Image) Classification
|
- 1D (Temporal) and 2D (Image) Classification
|
||||||
- Drug Discovery
|
- Drug Discovery
|
||||||
|
|
||||||
Multiple use cases of DGL have been tested and verified.
|
For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||||
However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
|
where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
|
||||||
Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_,
|
|
||||||
where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs.
|
|
||||||
|
|
||||||
Coverage includes:
|
* Although multiple use cases of DGL have been tested and verified, a few have been
|
||||||
|
outlined in the `DGL in the Real World: Running GNNs on Real Use Cases
|
||||||
|
<https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog
|
||||||
|
post, which walks through four real-world graph neural network (GNN) workloads
|
||||||
|
implemented with the Deep Graph Library on ROCm. It covers tasks ranging from
|
||||||
|
heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph
|
||||||
|
regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use
|
||||||
|
case, the authors detail: the dataset and task, how DGL is used, and their experience
|
||||||
|
porting to ROCm. It is shown that DGL codebases often run without modification, with
|
||||||
|
seamless integration of graph operations, message passing, sampling, and convolution.
|
||||||
|
|
||||||
- Single-GPU training/inference
|
* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware
|
||||||
- Multi-GPU training
|
<https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__
|
||||||
|
blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform,
|
||||||
|
bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges
|
||||||
|
the gap between dense tensor frameworks and the irregular nature of graph data through a
|
||||||
|
graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and
|
||||||
|
interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration
|
||||||
|
enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers
|
||||||
|
and open-source repositories. This marks a major step in AMD's mission to advance open,
|
||||||
|
scalable AI ecosystems beyond traditional architectures.
|
||||||
|
|
||||||
|
You can pre-process datasets and begin training on AMD GPUs through:
|
||||||
|
|
||||||
|
* Single-GPU training/inference
|
||||||
|
* Multi-GPU training
|
||||||
|
|
||||||
.. _dgl-docker-compat:
|
.. _dgl-docker-compat:
|
||||||
|
|
||||||
@@ -85,7 +104,7 @@ with ROCm backends on Docker Hub. The following Docker image tags and associated
|
|||||||
inventories represent the latest available DGL version from the official Docker Hub.
|
inventories represent the latest available DGL version from the official Docker Hub.
|
||||||
Click the |docker-icon| to view the image on Docker Hub.
|
Click the |docker-icon| to view the image on Docker Hub.
|
||||||
|
|
||||||
.. list-table:: DGL Docker image components
|
.. list-table::
|
||||||
:header-rows: 1
|
:header-rows: 1
|
||||||
:class: docker-image-compatibility
|
:class: docker-image-compatibility
|
||||||
|
|
||||||
@@ -98,43 +117,83 @@ Click the |docker-icon| to view the image on Docker Hub.
|
|||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
|
- `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
|
||||||
- 24.04
|
- 24.04
|
||||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||||
|
- 24.04
|
||||||
|
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||||
|
|
||||||
|
* - .. raw:: html
|
||||||
|
|
||||||
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
|
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||||
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
|
- `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
|
||||||
|
- 22.04
|
||||||
|
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||||
|
|
||||||
|
* - .. raw:: html
|
||||||
|
|
||||||
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
|
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
|
||||||
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
|
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||||
|
- 24.04
|
||||||
|
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||||
|
|
||||||
|
* - .. raw:: html
|
||||||
|
|
||||||
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
|
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||||
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
|
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||||
|
- 24.04
|
||||||
|
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||||
|
|
||||||
|
* - .. raw:: html
|
||||||
|
|
||||||
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
|
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||||
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
|
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
|
||||||
- 24.04
|
- 24.04
|
||||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||||
|
|
||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
|
||||||
- 22.04
|
- 22.04
|
||||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||||
|
|
||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
|
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||||
|
|
||||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
|
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
|
- `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
|
||||||
- 22.04
|
- 22.04
|
||||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||||
|
|
||||||
@@ -150,81 +209,102 @@ If you prefer to build it yourself, ensure the following dependencies are instal
|
|||||||
:header-rows: 1
|
:header-rows: 1
|
||||||
|
|
||||||
* - ROCm library
|
* - ROCm library
|
||||||
- ROCm 6.4.0 Version
|
- ROCm 7.0.0 Version
|
||||||
|
- ROCm 6.4.x Version
|
||||||
- Purpose
|
- Purpose
|
||||||
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
|
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
|
||||||
|
- 1.1.0
|
||||||
- 1.1.0
|
- 1.1.0
|
||||||
- Enables faster execution of core operations like matrix multiplication
|
- Enables faster execution of core operations like matrix multiplication
|
||||||
(GEMM), convolutions and transformations.
|
(GEMM), convolutions and transformations.
|
||||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||||
|
- 3.0.0
|
||||||
- 2.4.0
|
- 2.4.0
|
||||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||||
matrix and vector operations.
|
matrix and vector operations.
|
||||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||||
|
- 1.0.0
|
||||||
- 0.12.0
|
- 0.12.0
|
||||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||||
features like epilogues fused into the matrix multiplication kernel or
|
features like epilogues fused into the matrix multiplication kernel or
|
||||||
use of integer tensor cores.
|
use of integer tensor cores.
|
||||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||||
|
- 4.0.0
|
||||||
- 3.4.0
|
- 3.4.0
|
||||||
- Provides a C++ template library for parallel algorithms for reduction,
|
- Provides a C++ template library for parallel algorithms for reduction,
|
||||||
scan, sort and select.
|
scan, sort and select.
|
||||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||||
|
- 1.0.20
|
||||||
- 1.0.18
|
- 1.0.18
|
||||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||||
|
- 3.0.0
|
||||||
- 2.12.0
|
- 2.12.0
|
||||||
- Provides fast random number generation for GPUs.
|
- Provides fast random number generation for GPUs.
|
||||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||||
|
- 3.0.0
|
||||||
- 2.4.0
|
- 2.4.0
|
||||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||||
singular value decompositions (SVD).
|
singular value decompositions (SVD).
|
||||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||||
|
- 4.0.1
|
||||||
- 3.2.0
|
- 3.2.0
|
||||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||||
or matrix-matrix products.
|
or matrix-matrix products.
|
||||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||||
|
- 0.2.4
|
||||||
- 0.2.3
|
- 0.2.3
|
||||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||||
or matrix-matrix products.
|
or matrix-matrix products.
|
||||||
* - `hipTensor <https://github.com/ROCm/hipTensor>`_
|
* - `hipTensor <https://github.com/ROCm/hipTensor>`_
|
||||||
|
- 2.0.0
|
||||||
- 1.5.0
|
- 1.5.0
|
||||||
- Optimizes for high-performance tensor operations, such as contractions.
|
- Optimizes for high-performance tensor operations, such as contractions.
|
||||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||||
|
- 3.5.0
|
||||||
- 3.4.0
|
- 3.4.0
|
||||||
- Optimizes deep learning primitives such as convolutions, pooling,
|
- Optimizes deep learning primitives such as convolutions, pooling,
|
||||||
normalization, and activation functions.
|
normalization, and activation functions.
|
||||||
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
|
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
|
||||||
|
- 2.13.0
|
||||||
- 2.12.0
|
- 2.12.0
|
||||||
- Adds graph-level optimizations, ONNX models and mixed precision support
|
- Adds graph-level optimizations, ONNX models and mixed precision support
|
||||||
and enable Ahead-of-Time (AOT) Compilation.
|
and enable Ahead-of-Time (AOT) Compilation.
|
||||||
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
|
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
|
||||||
|
- 3.3.0
|
||||||
- 3.2.0
|
- 3.2.0
|
||||||
- Optimizes acceleration for computer vision and AI workloads like
|
- Optimizes acceleration for computer vision and AI workloads like
|
||||||
preprocessing, augmentation, and inferencing.
|
preprocessing, augmentation, and inferencing.
|
||||||
* - `rocAL <https://github.com/ROCm/rocAL>`_
|
* - `rocAL <https://github.com/ROCm/rocAL>`_
|
||||||
- :version-ref:`rocAL rocm_version`
|
- 3.3.0
|
||||||
|
- 2.2.0
|
||||||
- Accelerates the data pipeline by offloading intensive preprocessing and
|
- Accelerates the data pipeline by offloading intensive preprocessing and
|
||||||
augmentation tasks. rocAL is part of MIVisionX.
|
augmentation tasks. rocAL is part of MIVisionX.
|
||||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||||
- 2.2.0
|
- 2.26.6
|
||||||
|
- 2.22.3
|
||||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||||
Broadcast.
|
Broadcast.
|
||||||
* - `rocDecode <https://github.com/ROCm/rocDecode>`_
|
* - `rocDecode <https://github.com/ROCm/rocDecode>`_
|
||||||
|
- 1.0.0
|
||||||
- 0.10.0
|
- 0.10.0
|
||||||
- Provides hardware-accelerated data decoding capabilities, particularly
|
- Provides hardware-accelerated data decoding capabilities, particularly
|
||||||
for image, video, and other dataset formats.
|
for image, video, and other dataset formats.
|
||||||
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
|
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
|
||||||
|
- 1.1.0
|
||||||
- 0.8.0
|
- 0.8.0
|
||||||
- Provides hardware-accelerated JPEG image decoding and encoding.
|
- Provides hardware-accelerated JPEG image decoding and encoding.
|
||||||
* - `RPP <https://github.com/ROCm/RPP>`_
|
* - `RPP <https://github.com/ROCm/RPP>`_
|
||||||
|
- 2.0.0
|
||||||
- 1.9.10
|
- 1.9.10
|
||||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||||
|
- 4.0.0
|
||||||
- 3.3.0
|
- 3.3.0
|
||||||
- Provides a C++ template library for parallel algorithms like sorting,
|
- Provides a C++ template library for parallel algorithms like sorting,
|
||||||
reduction, and scanning.
|
reduction, and scanning.
|
||||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
|
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
|
||||||
|
- 2.0.0
|
||||||
- 1.7.0
|
- 1.7.0
|
||||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||||
multiplication (GEMM) and accumulation operations with mixed precision
|
multiplication (GEMM) and accumulation operations with mixed precision
|
||||||
@@ -253,26 +333,29 @@ Instead of listing them all, support is grouped into the following categories to
|
|||||||
* DGL NN
|
* DGL NN
|
||||||
* DGL Optim
|
* DGL Optim
|
||||||
* DGL Sparse
|
* DGL Sparse
|
||||||
|
* GraphBolt
|
||||||
|
|
||||||
Unsupported features
|
Unsupported features
|
||||||
================================================================================
|
================================================================================
|
||||||
|
|
||||||
* GraphBolt
|
* TF32 Support (only supported for PyTorch 2.7 and above)
|
||||||
* Partial TF32 Support (MI250X only)
|
|
||||||
* Kineto/ROCTracer integration
|
* Kineto/ROCTracer integration
|
||||||
|
|
||||||
|
|
||||||
Unsupported functions
|
Unsupported functions
|
||||||
================================================================================
|
================================================================================
|
||||||
|
|
||||||
* ``more_nnz``
|
* ``bfs``
|
||||||
* ``format``
|
* ``format``
|
||||||
* ``multiprocess_sparse_adam_state_dict``
|
* ``multiprocess_sparse_adam_state_dict``
|
||||||
* ``record_stream_ndarray``
|
|
||||||
* ``half_spmm``
|
* ``half_spmm``
|
||||||
* ``segment_mm``
|
* ``segment_mm``
|
||||||
* ``gather_mm_idx_b``
|
* ``gather_mm_idx_b``
|
||||||
* ``pgexplainer``
|
|
||||||
* ``sample_labors_prob``
|
* ``sample_labors_prob``
|
||||||
* ``sample_labors_noprob``
|
* ``sample_labors_noprob``
|
||||||
|
* ``sparse_admin``
|
||||||
|
|
||||||
|
Previous versions
|
||||||
|
===============================================================================
|
||||||
|
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
|
||||||
|
of the ``ROCm/dgl`` Docker image.
|
||||||
@@ -45,7 +45,7 @@ llama.cpp is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
|||||||
Supported devices
|
Supported devices
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210
|
**Officially Supported**: AMD Instinct™ MI325X, MI300X, MI210
|
||||||
|
|
||||||
Use cases and recommendations
|
Use cases and recommendations
|
||||||
================================================================================
|
================================================================================
|
||||||
@@ -109,27 +109,27 @@ Click |docker-icon| to view the image on Docker Hub.
|
|||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_full/images/sha256-a2ecd635eaa65bb289a9041330128677f3ae88bee6fee0597424b17e38d4903c"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- .. raw:: html
|
- .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_server/images/sha256-cb46b47df415addb5ceb6e6fdf0be70bf9d7f6863bbe6e10c2441ecb84246d52"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- .. raw:: html
|
- .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_light/images/sha256-8f8536eec4b05c0ff1c022f9fc6c527ad1c89e6c1ca0906e4d39e4de73edbde9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
|
||||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||||
- 24.04
|
- 24.04
|
||||||
|
|
||||||
* - .. raw:: html
|
* - .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_full/images/sha256-f36de2a3b03ae53e81c85422cb3780368c9891e1ac7884b04403a921fe2ea45d"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- .. raw:: html
|
- .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_server/images/sha256-df15e8ab11a6837cd3736644fec1e047465d49e37d610ab0b79df000371327df"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- .. raw:: html
|
- .. raw:: html
|
||||||
|
|
||||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_light/images/sha256-4ea2d5bb7964f0ee3ea9b30ba7f343edd6ddfab1b1037669ca7eafad2e3c2bd7"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
|
||||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||||
- 22.04
|
- 22.04
|
||||||
|
|
||||||
|
|||||||
@@ -84,6 +84,8 @@ The table below summarizes information about ROCm-enabled deep learning framewor
|
|||||||
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
|
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
|
||||||
-
|
-
|
||||||
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__
|
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__
|
||||||
|
- `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-wheels-package>`__
|
||||||
|
|
||||||
- .. raw:: html
|
- .. raw:: html
|
||||||
|
|
||||||
<a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a>
|
<a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a>
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ Quick start examples:
|
|||||||
export VLLM_ROCM_USE_AITER=1
|
export VLLM_ROCM_USE_AITER=1
|
||||||
vllm serve MODEL_NAME
|
vllm serve MODEL_NAME
|
||||||
|
|
||||||
# Enable only AITER Triton Prefill-Decode (split) attention
|
# Enable AITER Fused MoE and enable Triton Prefill-Decode (split) attention
|
||||||
export VLLM_ROCM_USE_AITER=1
|
export VLLM_ROCM_USE_AITER=1
|
||||||
export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1
|
export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1
|
||||||
export VLLM_ROCM_USE_AITER_MHA=0
|
export VLLM_ROCM_USE_AITER_MHA=0
|
||||||
@@ -244,14 +244,17 @@ Most users won't need this, but you can override the defaults:
|
|||||||
* - AITER MHA (standard models)
|
* - AITER MHA (standard models)
|
||||||
- ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models)
|
- ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models)
|
||||||
|
|
||||||
* - AITER Triton Prefill-Decode (split)
|
* - vLLM Triton Unified (default)
|
||||||
|
- ``VLLM_ROCM_USE_AITER=0`` (or unset)
|
||||||
|
|
||||||
|
* - Triton Prefill-Decode (split) without AITER
|
||||||
|
- | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
|
||||||
|
|
||||||
|
* - Triton Prefill-Decode (split) along with AITER Fused-MoE
|
||||||
- | ``VLLM_ROCM_USE_AITER=1``
|
- | ``VLLM_ROCM_USE_AITER=1``
|
||||||
| ``VLLM_ROCM_USE_AITER_MHA=0``
|
| ``VLLM_ROCM_USE_AITER_MHA=0``
|
||||||
| ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
|
| ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
|
||||||
|
|
||||||
* - vLLM Triton Unified (default)
|
|
||||||
- ``VLLM_ROCM_USE_AITER=0`` (or unset)
|
|
||||||
|
|
||||||
* - AITER Unified Attention
|
* - AITER Unified Attention
|
||||||
- | ``VLLM_ROCM_USE_AITER=1``
|
- | ``VLLM_ROCM_USE_AITER=1``
|
||||||
| ``VLLM_ROCM_USE_AITER_MHA=0``
|
| ``VLLM_ROCM_USE_AITER_MHA=0``
|
||||||
@@ -269,11 +272,11 @@ Most users won't need this, but you can override the defaults:
|
|||||||
--block-size 1 \
|
--block-size 1 \
|
||||||
--tensor-parallel-size 8
|
--tensor-parallel-size 8
|
||||||
|
|
||||||
# Advanced: Use Prefill-Decode split (for short input cases)
|
# Advanced: Use Prefill-Decode split (for short input cases) with AITER Fused-MoE
|
||||||
VLLM_ROCM_USE_AITER=1 \
|
VLLM_ROCM_USE_AITER=1 \
|
||||||
VLLM_ROCM_USE_AITER_MHA=0 \
|
VLLM_ROCM_USE_AITER_MHA=0 \
|
||||||
VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \
|
VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \
|
||||||
vllm serve meta-llama/Llama-3.3-70B-Instruct
|
vllm serve meta-llama/Llama-4-Scout-17B-16E
|
||||||
|
|
||||||
**Which backend should I choose?**
|
**Which backend should I choose?**
|
||||||
|
|
||||||
@@ -352,14 +355,14 @@ vLLM V1 on ROCm provides these attention implementations:
|
|||||||
|
|
||||||
3. **AITER Triton Prefill–Decode Attention** (hybrid, Instinct MI300X-optimized)
|
3. **AITER Triton Prefill–Decode Attention** (hybrid, Instinct MI300X-optimized)
|
||||||
|
|
||||||
* Enable with ``VLLM_ROCM_USE_AITER=1``, ``VLLM_ROCM_USE_AITER_MHA=0``, and ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
|
* Enable with ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
|
||||||
* Uses separate kernels for prefill and decode phases:
|
* Uses separate kernels for prefill and decode phases:
|
||||||
|
|
||||||
* **Prefill**: ``context_attention_fwd`` Triton kernel
|
* **Prefill**: ``context_attention_fwd`` Triton kernel
|
||||||
* **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 1–16, context ≤131k; sliding window not supported)
|
* **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 1–16, context ≤131k; sliding window not supported)
|
||||||
* **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements
|
* **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements
|
||||||
|
|
||||||
* Usually better compared to unified Triton kernels (both vLLM and AITER variants)
|
* Usually better compared to unified Triton kernels
|
||||||
* Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios
|
* Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios
|
||||||
* The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**)
|
* The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**)
|
||||||
|
|
||||||
@@ -695,7 +698,9 @@ There are two strategies:
|
|||||||
vLLM engine arguments
|
vLLM engine arguments
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
Selected arguments that often help on ROCm. See `engine args docs <https://docs.vllm.ai/en/latest/serving/engine_args.html>`_ for the full list.
|
Selected arguments that often help on ROCm. See `Engine Arguments
|
||||||
|
<https://docs.vllm.ai/en/stable/configuration/engine_args.html>`__ in the vLLM
|
||||||
|
documentation for the full list.
|
||||||
|
|
||||||
Configure --max-num-seqs
|
Configure --max-num-seqs
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `officia
|
|||||||
<https://docs.vllm.ai/>`_ for more information.
|
<https://docs.vllm.ai/>`_ for more information.
|
||||||
|
|
||||||
For guidance on using vLLM with ROCm, refer to `Installation with ROCm
|
For guidance on using vLLM with ROCm, refer to `Installation with ROCm
|
||||||
<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html>`_.
|
<https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#amd-rocm>`__.
|
||||||
|
|
||||||
vLLM installation
|
vLLM installation
|
||||||
-----------------
|
-----------------
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
rocm-docs-core==1.28.0
|
rocm-docs-core==1.29.0
|
||||||
sphinx-reredirects
|
sphinx-reredirects
|
||||||
sphinx-sitemap
|
sphinx-sitemap
|
||||||
sphinxcontrib.datatemplates==0.11.0
|
sphinxcontrib.datatemplates==0.11.0
|
||||||
|
|||||||
@@ -187,7 +187,7 @@ requests==2.32.5
|
|||||||
# via
|
# via
|
||||||
# pygithub
|
# pygithub
|
||||||
# sphinx
|
# sphinx
|
||||||
rocm-docs-core==1.28.0
|
rocm-docs-core==1.29.0
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
rpds-py==0.28.0
|
rpds-py==0.28.0
|
||||||
# via
|
# via
|
||||||
|
|||||||
60
tools/rocm-build/rocm-7.1.1.xml
Normal file
60
tools/rocm-build/rocm-7.1.1.xml
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<manifest>
|
||||||
|
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||||
|
<default revision="refs/tags/rocm-7.1.1"
|
||||||
|
remote="rocm-org"
|
||||||
|
sync-c="true"
|
||||||
|
sync-j="4" />
|
||||||
|
<!--list of projects for ROCm-->
|
||||||
|
<project name="ROCK-Kernel-Driver" />
|
||||||
|
<project name="amdsmi" />
|
||||||
|
<project name="rocm_bandwidth_test" />
|
||||||
|
<project name="rocm-examples" />
|
||||||
|
<!--HIP Projects-->
|
||||||
|
<project name="HIPIFY" />
|
||||||
|
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||||
|
<project name="half" />
|
||||||
|
<project name="llvm-project" />
|
||||||
|
<project name="spirv-llvm-translator" />
|
||||||
|
<!-- gdb projects -->
|
||||||
|
<project name="ROCdbgapi" />
|
||||||
|
<project name="ROCgdb" />
|
||||||
|
<project name="rocr_debug_agent" />
|
||||||
|
<!-- ROCm Libraries -->
|
||||||
|
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||||
|
<project groups="mathlibs" name="MIVisionX" />
|
||||||
|
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||||
|
<project groups="mathlibs" name="composable_kernel" />
|
||||||
|
<project groups="mathlibs" name="hipSOLVER" />
|
||||||
|
<project groups="mathlibs" name="hipTensor" />
|
||||||
|
<project groups="mathlibs" name="hipTensor" />
|
||||||
|
<project groups="mathlibs" name="hipfort" />
|
||||||
|
<project groups="mathlibs" name="rccl" />
|
||||||
|
<project groups="mathlibs" name="rocAL" />
|
||||||
|
<project groups="mathlibs" name="rocALUTION" />
|
||||||
|
<project groups="mathlibs" name="rocDecode" />
|
||||||
|
<project groups="mathlibs" name="rocJPEG" />
|
||||||
|
<!-- The following components have been migrated to rocm-libraries:
|
||||||
|
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||||
|
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||||
|
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||||
|
rocSPARSE rocThrust Tensile -->
|
||||||
|
<project groups="mathlibs" name="rocm-libraries" />
|
||||||
|
<!-- The following components have been migrated to rocm-systems:
|
||||||
|
aqlprofile clr hip hip-tests hipother
|
||||||
|
rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute
|
||||||
|
rocprofiler-register rocprofiler-sdk rocprofiler-systems
|
||||||
|
rocprofiler rocr-runtime roctracer -->
|
||||||
|
<project groups="mathlibs" name="rocm-systems" />
|
||||||
|
<project groups="mathlibs" name="rocPyDecode" />
|
||||||
|
<project groups="mathlibs" name="rocSHMEM" />
|
||||||
|
<project groups="mathlibs" name="rocSOLVER" />
|
||||||
|
<project groups="mathlibs" name="rocWMMA" />
|
||||||
|
<project groups="mathlibs" name="rocm-cmake" />
|
||||||
|
<project groups="mathlibs" name="rpp" />
|
||||||
|
<project groups="mathlibs" name="TransferBench" />
|
||||||
|
<!-- Projects for OpenMP-Extras -->
|
||||||
|
<project name="aomp" path="openmp-extras/aomp" />
|
||||||
|
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
|
||||||
|
<project name="flang" path="openmp-extras/flang" />
|
||||||
|
</manifest>
|
||||||
Reference in New Issue
Block a user