Merge remote-tracking branch 'external/develop' into sync-develop-from-external

This commit is contained in:
Alex Xu
2025-11-26 10:09:04 -05:00
21 changed files with 445 additions and 153 deletions

View File

@@ -128,6 +128,9 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.28.6'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -152,6 +155,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=${{ job.target }} -DGPU_TARGETS=${{ job.target }}
-DAMDGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }}
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
@@ -192,6 +196,9 @@ jobs:
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }} pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters:
cmakeVersion: '3.28.6'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
@@ -217,6 +224,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=${{ job.target }} -DGPU_TARGETS=${{ job.target }}
-DAMDGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }}
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include

View File

@@ -1,10 +1,29 @@
parameters: parameters:
- name: componentName
type: string
default: amdsmi
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -31,7 +50,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: amdsmi_build_${{ job.os }} - job: ${{ parameters.componentName }}_build_${{ job.os }}
pool: pool:
${{ if eq(job.os, 'ubuntu2404') }}: ${{ if eq(job.os, 'ubuntu2404') }}:
vmImage: 'ubuntu-24.04' vmImage: 'ubuntu-24.04'
@@ -55,6 +74,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
@@ -65,50 +85,54 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
os: ${{ job.os }} os: ${{ job.os }}
componentName: ${{ parameters.componentName }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters: # parameters:
# aptPackages: ${{ parameters.aptPackages }} # aptPackages: ${{ parameters.aptPackages }}
- ${{ each job in parameters.jobMatrix.testJobs }}: - ${{ if eq(parameters.unifiedBuild, False) }}:
- job: amdsmi_test_${{ job.os }}_${{ job.target }} - ${{ each job in parameters.jobMatrix.testJobs }}:
dependsOn: amdsmi_build_${{ job.os }} - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
condition: dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
and(succeeded(), condition:
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), and(succeeded(),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(${{ parameters.aggregatePipeline }}, False) not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
) eq(${{ parameters.aggregatePipeline }}, False)
variables: )
- group: common variables:
- template: /.azuredevops/variables-global.yml - group: common
pool: ${{ job.target }}_test_pool - template: /.azuredevops/variables-global.yml
workspace: pool: ${{ job.target }}_test_pool
clean: all workspace:
steps: clean: all
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml steps:
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
aptPackages: ${{ parameters.aptPackages }} parameters:
packageManager: ${{ job.packageManager }} aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
os: ${{ job.os }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml os: ${{ job.os }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
runRocminfo: false parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml runRocminfo: false
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
componentName: amdsmi parameters:
testDir: '$(Agent.BuildDirectory)' componentName: ${{ parameters.componentName }}
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst' testDir: '$(Agent.BuildDirectory)'
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
os: ${{ job.os }} testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml os: ${{ job.os }}
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
aptPackages: ${{ parameters.aptPackages }} parameters:
environment: test aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }} environment: test
gpuTarget: ${{ job.target }}

View File

@@ -1,10 +1,29 @@
parameters: parameters:
- name: componentName
type: string
default: hipTensor
- name: checkoutRepo - name: checkoutRepo
type: string type: string
default: 'self' default: 'self'
- name: checkoutRef - name: checkoutRef
type: string type: string
default: '' default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack # set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline # and dependencies are pulled from same pipeline
- name: aggregatePipeline - name: aggregatePipeline
@@ -51,7 +70,7 @@ parameters:
jobs: jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}: - ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: hipTensor_build_${{ job.target }} - job: ${{ parameters.componentName }}_build_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
@@ -66,12 +85,15 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters: parameters:
checkoutRef: ${{ parameters.checkoutRef }} checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }} dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }} aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
extraBuildFlags: >- extraBuildFlags: >-
@@ -85,9 +107,12 @@ jobs:
-GNinja -GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters: parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -95,44 +120,47 @@ jobs:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}
- ${{ each job in parameters.jobMatrix.testJobs }}: - ${{ if eq(parameters.unifiedBuild, False) }}:
- job: hipTensor_test_${{ job.target }} - ${{ each job in parameters.jobMatrix.testJobs }}:
timeoutInMinutes: 90 - job: ${{ parameters.componentName }}_test_${{ job.target }}
dependsOn: hipTensor_build_${{ job.target }} timeoutInMinutes: 90
condition: dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
and(succeeded(), condition:
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), and(succeeded(),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])), eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
eq(${{ parameters.aggregatePipeline }}, False) not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
) eq(${{ parameters.aggregatePipeline }}, False)
variables: )
- group: common variables:
- template: /.azuredevops/variables-global.yml - group: common
pool: ${{ job.target }}_test_pool - template: /.azuredevops/variables-global.yml
workspace: pool: ${{ job.target }}_test_pool
clean: all workspace:
steps: clean: all
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml steps:
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
aptPackages: ${{ parameters.aptPackages }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
gpuTarget: ${{ job.target }} parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
parameters: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
checkoutRef: ${{ parameters.checkoutRef }} parameters:
dependencyList: ${{ parameters.rocmTestDependencies }} checkoutRef: ${{ parameters.checkoutRef }}
gpuTarget: ${{ job.target }} dependencyList: ${{ parameters.rocmTestDependencies }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml ${{ if parameters.triggerDownstreamJobs }}:
parameters: downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
componentName: hipTensor - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml' parameters:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml componentName: ${{ parameters.componentName }}
parameters: testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
aptPackages: ${{ parameters.aptPackages }} testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
environment: test - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
gpuTarget: ${{ job.target }} parameters:
aptPackages: ${{ parameters.aptPackages }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -142,7 +142,7 @@ jobs:
- ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ if eq(parameters.unifiedBuild, False) }}:
- ${{ each job in parameters.jobMatrix.testJobs }}: - ${{ each job in parameters.jobMatrix.testJobs }}:
- job: ${{ parameters.componentName }}_test_${{ job.target }} - job: ${{ parameters.componentName }}_test_${{ job.target }}
timeoutInMinutes: 270 timeoutInMinutes: 350
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
condition: condition:
and(succeeded(), and(succeeded(),

View File

@@ -62,6 +62,7 @@ parameters:
- llvm-project - llvm-project
- MIOpen - MIOpen
- MIVisionX - MIVisionX
- rocm_smi_lib
- rccl - rccl
- rocALUTION - rocALUTION
- rocBLAS - rocBLAS
@@ -100,6 +101,7 @@ parameters:
- llvm-project - llvm-project
- MIOpen - MIOpen
- MIVisionX - MIVisionX
- rocm_smi_lib
- rccl - rccl
- rocALUTION - rocALUTION
- rocBLAS - rocBLAS
@@ -146,6 +148,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerROCmPackages: true registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
parameters: parameters:
@@ -245,5 +248,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters: parameters:
aptPackages: ${{ parameters.aptPackages }} aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test environment: test
gpuTarget: ${{ job.target }} gpuTarget: ${{ job.target }}

View File

@@ -65,6 +65,13 @@ parameters:
- pytest - pytest
- pytest-cov - pytest-cov
- pytest-xdist - pytest-xdist
- name: rocmDependencies
type: object
default:
- clr
- llvm-project
- ROCR-Runtime
- rocprofiler-sdk
- name: rocmTestDependencies - name: rocmTestDependencies
type: object type: object
default: default:
@@ -101,10 +108,12 @@ jobs:
${{ if parameters.buildDependsOn }}: ${{ if parameters.buildDependsOn }}:
dependsOn: dependsOn:
- ${{ each build in parameters.buildDependsOn }}: - ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}_${{ job.target }} - ${{ build }}_${{ job.target }}
variables: variables:
- group: common - group: common
- template: /.azuredevops/variables-global.yml - template: /.azuredevops/variables-global.yml
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
pool: pool:
vmImage: ${{ variables.BASE_BUILD_POOL }} vmImage: ${{ variables.BASE_BUILD_POOL }}
workspace: workspace:
@@ -119,6 +128,14 @@ jobs:
parameters: parameters:
checkoutRepo: ${{ parameters.checkoutRepo }} checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters: parameters:
extraBuildFlags: >- extraBuildFlags: >-

View File

@@ -63,6 +63,7 @@ parameters:
libopenblas-dev: openblas-devel libopenblas-dev: openblas-devel
libopenmpi-dev: openmpi-devel libopenmpi-dev: openmpi-devel
libpci-dev: libpciaccess-devel libpci-dev: libpciaccess-devel
libsimde-dev: simde-devel
libssl-dev: openssl-devel libssl-dev: openssl-devel
# note: libstdc++-devel is in the base packages list # note: libstdc++-devel is in the base packages list
libsystemd-dev: systemd-devel libsystemd-dev: systemd-devel

View File

@@ -35,8 +35,8 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
amdsmi: amdsmi:
pipelineId: 99 pipelineId: 376
developBranch: amd-staging developBranch: develop
hasGpuTarget: false hasGpuTarget: false
aomp-extras: aomp-extras:
pipelineId: 111 pipelineId: 111
@@ -115,7 +115,7 @@ parameters:
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
hipTensor: hipTensor:
pipelineId: 105 pipelineId: 374
developBranch: develop developBranch: develop
hasGpuTarget: true hasGpuTarget: true
llvm-project: llvm-project:

View File

@@ -140,6 +140,7 @@ EoS
etcd etcd
fas fas
FBGEMM FBGEMM
FiLM
FIFOs FIFOs
FFT FFT
FFTs FFTs
@@ -160,10 +161,12 @@ Fortran
Fuyu Fuyu
GALB GALB
GAT GAT
GATNE
GCC GCC
GCD GCD
GCDs GCDs
GCN GCN
GCNN
GDB GDB
GDDR GDDR
GDR GDR
@@ -182,6 +185,8 @@ Glibc
GLXT GLXT
Gloo Gloo
GMI GMI
GNN
GNNs
GPG GPG
GPR GPR
GPT GPT

View File

@@ -233,7 +233,7 @@ for a complete overview of this release.
* Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios. * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.
```{note} ```{note}
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions. See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
``` ```
### **Composable Kernel** (1.1.0) ### **Composable Kernel** (1.1.0)
@@ -677,7 +677,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
* Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300. * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
* Interactive metric descriptions in TUI analyze mode. * Interactive metric descriptions in TUI analyze mode.
* You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab. * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option. * Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option.
* `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC
* `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
@@ -763,7 +763,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
* MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum. * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
* A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation. * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
* The profile mode crashes when `--format-rocprof-output json` is selected. * The profile mode crashes when `--format-rocprof-output json` is selected.
* As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data. * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.
### **ROCm Data Center Tool** (1.2.0) ### **ROCm Data Center Tool** (1.2.0)
@@ -804,6 +804,14 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
- Updated PAPI module to v7.2.0b2. - Updated PAPI module to v7.2.0b2.
- ROCprofiler-SDK is now used for tracing OMPT API calls. - ROCprofiler-SDK is now used for tracing OMPT API calls.
#### Known issues
* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment:
```
export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH
```
### **rocPRIM** (4.1.0) ### **rocPRIM** (4.1.0)
#### Added #### Added
@@ -881,17 +889,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE
### **rocSOLVER** (3.31.0) ### **rocSOLVER** (3.31.0)
#### Added
* Hybrid computation support for existing routines: STEQR
#### Optimized #### Optimized
Improved the performance of: Improved the performance of:
* BDSQR and downstream functions such as GESVD. * LARF, LARFT, GEQR2, and downstream functions such as GEQRF.
* STEQR and downstream functions such as SYEV/HEEV. * STEDC and divide and conquer Eigensolvers.
* LARFT and downstream functions such as GEQR2 and GEQRF.
### **rocSPARSE** (4.1.0) ### **rocSPARSE** (4.1.0)

View File

@@ -676,6 +676,10 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid
- Fixed output for `amd-smi xgmi -l --json`. - Fixed output for `amd-smi xgmi -l --json`.
```{note}
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
```
### **Composable Kernel** (1.1.0) ### **Composable Kernel** (1.1.0)
#### Upcoming changes #### Upcoming changes
@@ -863,6 +867,22 @@ be fixed in a future ROCm release.
Due to partial data corruption of Electrically Erasable Programmable Read-Only Memory (EEPROM) and limited error handling in the AMD GPU Driver(amdgpu), excessive log output might result when querying the reliability, availability, and serviceability (RAS) bad pages. This issue will be fixed in a future AMD GPU Driver(amdgpu) and ROCm release. Due to partial data corruption of Electrically Erasable Programmable Read-Only Memory (EEPROM) and limited error handling in the AMD GPU Driver(amdgpu), excessive log output might result when querying the reliability, availability, and serviceability (RAS) bad pages. This issue will be fixed in a future AMD GPU Driver(amdgpu) and ROCm release.
### OpenBLAS runtime dependency for hipblastlt-test and hipblaslt-bench
Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS development package results in the following error:
```
libopenblas.so.0: cannot open shared object file: No such file or directory
```
As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. See [GitHub issue #5639](https://github.com/ROCm/ROCm/issues/5639).
### Reduced precision in gemm_ex operations for rocBLAS and hipBLAS
Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640).
### RCCL profiler plugin failure with AllToAll operations
The RCCL profiler plugin `librccl-profiler.so` might fail with a segmentation fault during `AllToAll` collective operations due to improperly assigned point-to-point task function pointers. This leads to invalid memory access and prevents profiling of `AllToAll` performance. Other operations, like `AllReduce`, are unaffected. It's recommended to avoid using the RCCL profiler plugin with `AllToAll` operations until the fix is available. This issue is resolved in the {fab}`github`[RCCL `develop` branch](https://github.com/ROCm/rccl/tree/develop) and will be part of a future ROCm release. See [GitHub issue #5653](https://github.com/ROCm/ROCm/issues/5653).
## ROCm resolved issues ## ROCm resolved issues
The following are previously known issues resolved in this release. For resolved issues related to The following are previously known issues resolved in this release. For resolved issues related to

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<manifest> <manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" /> <remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-7.1.0" <default revision="refs/tags/rocm-7.1.1"
remote="rocm-org" remote="rocm-org"
sync-c="true" sync-c="true"
sync-j="4" /> sync-j="4" />
@@ -25,6 +25,7 @@
<project groups="mathlibs" name="MIVisionX" /> <project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" /> <project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="composable_kernel" /> <project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipSOLVER" />
<project groups="mathlibs" name="hipTensor" /> <project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipfort" /> <project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" /> <project groups="mathlibs" name="rccl" />
@@ -45,6 +46,7 @@
rocprofiler rocr-runtime roctracer --> rocprofiler rocr-runtime roctracer -->
<project groups="mathlibs" name="rocm-systems" /> <project groups="mathlibs" name="rocm-systems" />
<project groups="mathlibs" name="rocPyDecode" /> <project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocSOLVER" />
<project groups="mathlibs" name="rocSHMEM" /> <project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocWMMA" /> <project groups="mathlibs" name="rocWMMA" />
<project groups="mathlibs" name="rocm-cmake" /> <project groups="mathlibs" name="rocm-cmake" />

View File

@@ -155,8 +155,38 @@ compatibility and system requirements.
.. rubric:: Footnotes .. rubric:: Footnotes
<<<<<<< HEAD
.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`_. .. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`_.
.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`_. .. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`_.
=======
.. [#rhel-10-702] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU.
.. [#rhel-94-702] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`.
.. [#rhel-700] RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
.. [#sles-710] **For ROCm 7.1.x** - SLES 15 SP7 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
.. [#ol-710-mi300x] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
.. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
.. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs.
.. [#db12-710] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
.. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
.. [#rl-700] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs.
.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
.. [#mi350x-os-710] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs supports all listed :ref:`supported_distributions` except RHEL 8.10, SLES 15 SP7, Debian 12, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
.. [#mi350x-os-700] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9.
.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, and RHEL 9.6.
.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0.
.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) GPUs only supports Ubuntu 24.04.3 and Ubuntu 22.04.5.
.. [#mi325x-os-710] **For ROCm 7.1.x** - AMD Instinct MI325X GPUs (gfx942) supports all listed :ref:`supported_distributions` except RHEL 8.10, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPUs (gfx942) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
.. [#mi300x-os] **Starting ROCm 7.0.x** - AMD Instinct MI300X GPUs (gfx942) supports all listed :ref:`supported_distributions`.
.. [#mi300A-os] **Starting ROCm 7.0.x** - AMD Instinct MI300A GPUs (gfx942) supports all listed :ref:`supported_distributions` except on Debian 13, Azure Linux 3.0, Oracle Linux 10, Oracle Linux 9, and Oracle Linux 8.
.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
.. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7.
.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
.. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
>>>>>>> external/develop
.. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
.. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x. .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
@@ -243,7 +273,7 @@ Expand for full historical view of:
.. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
.. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0. .. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0. .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
.. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0. .. [#dgl_compat-past-60] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
.. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0. .. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
.. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2. .. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
.. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1. .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.

View File

@@ -39,13 +39,13 @@ Support overview
Version support Version support
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
DGL is supported on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__. DGL is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__,
`ROCm 6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__, and `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
Supported devices Supported devices
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
- **Officially Supported**: AMD Instinct™ MI300X (through `hipBLASlt <https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/index.html>`__) **Officially Supported**: AMD Instinct™ MI300X, MI250X
- **Partially Supported**: AMD Instinct™ MI250X
.. _dgl-recommendations: .. _dgl-recommendations:
@@ -60,16 +60,35 @@ GAT, GCN, and GraphSage. Using these models, a variety of use cases are supporte
- 1D (Temporal) and 2D (Image) Classification - 1D (Temporal) and 2D (Image) Classification
- Drug Discovery - Drug Discovery
Multiple use cases of DGL have been tested and verified. For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``. where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_,
where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs.
Coverage includes: * Although multiple use cases of DGL have been tested and verified, a few have been
outlined in the `DGL in the Real World: Running GNNs on Real Use Cases
<https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog
post, which walks through four real-world graph neural network (GNN) workloads
implemented with the Deep Graph Library on ROCm. It covers tasks ranging from
heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph
regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use
case, the authors detail: the dataset and task, how DGL is used, and their experience
porting to ROCm. It is shown that DGL codebases often run without modification, with
seamless integration of graph operations, message passing, sampling, and convolution.
- Single-GPU training/inference * The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware
- Multi-GPU training <https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__
blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform,
bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges
the gap between dense tensor frameworks and the irregular nature of graph data through a
graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and
interoperability across frameworks like PyTorch and TensorFlow. AMDs ROCm integration
enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers
and open-source repositories. This marks a major step in AMD's mission to advance open,
scalable AI ecosystems beyond traditional architectures.
You can pre-process datasets and begin training on AMD GPUs through:
* Single-GPU training/inference
* Multi-GPU training
.. _dgl-docker-compat: .. _dgl-docker-compat:
@@ -85,7 +104,7 @@ with ROCm backends on Docker Hub. The following Docker image tags and associated
inventories represent the latest available DGL version from the official Docker Hub. inventories represent the latest available DGL version from the official Docker Hub.
Click the |docker-icon| to view the image on Docker Hub. Click the |docker-icon| to view the image on Docker Hub.
.. list-table:: DGL Docker image components .. list-table::
:header-rows: 1 :header-rows: 1
:class: docker-image-compatibility :class: docker-image-compatibility
@@ -98,43 +117,83 @@ Click the |docker-icon| to view the image on Docker Hub.
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__. - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__ - `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
- 24.04 - 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__ - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__. - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__ - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
- 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
- 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
- 24.04 - 24.04
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__ - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__. - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__ - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
- 22.04 - 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__ - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a> <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__. - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__ - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__ - `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
- 22.04 - 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__ - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
@@ -150,81 +209,102 @@ If you prefer to build it yourself, ensure the following dependencies are instal
:header-rows: 1 :header-rows: 1
* - ROCm library * - ROCm library
- ROCm 6.4.0 Version - ROCm 7.0.0 Version
- ROCm 6.4.x Version
- Purpose - Purpose
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_ * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
- 1.1.0
- 1.1.0 - 1.1.0
- Enables faster execution of core operations like matrix multiplication - Enables faster execution of core operations like matrix multiplication
(GEMM), convolutions and transformations. (GEMM), convolutions and transformations.
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_ * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
- 3.0.0
- 2.4.0 - 2.4.0
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
matrix and vector operations. matrix and vector operations.
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_ * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
- 1.0.0
- 0.12.0 - 0.12.0
- hipBLASLt is an extension of the hipBLAS library, providing additional - hipBLASLt is an extension of the hipBLAS library, providing additional
features like epilogues fused into the matrix multiplication kernel or features like epilogues fused into the matrix multiplication kernel or
use of integer tensor cores. use of integer tensor cores.
* - `hipCUB <https://github.com/ROCm/hipCUB>`_ * - `hipCUB <https://github.com/ROCm/hipCUB>`_
- 4.0.0
- 3.4.0 - 3.4.0
- Provides a C++ template library for parallel algorithms for reduction, - Provides a C++ template library for parallel algorithms for reduction,
scan, sort and select. scan, sort and select.
* - `hipFFT <https://github.com/ROCm/hipFFT>`_ * - `hipFFT <https://github.com/ROCm/hipFFT>`_
- 1.0.20
- 1.0.18 - 1.0.18
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations. - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
* - `hipRAND <https://github.com/ROCm/hipRAND>`_ * - `hipRAND <https://github.com/ROCm/hipRAND>`_
- 3.0.0
- 2.12.0 - 2.12.0
- Provides fast random number generation for GPUs. - Provides fast random number generation for GPUs.
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_ * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
- 3.0.0
- 2.4.0 - 2.4.0
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
singular value decompositions (SVD). singular value decompositions (SVD).
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_ * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
- 4.0.1
- 3.2.0 - 3.2.0
- Accelerates operations on sparse matrices, such as sparse matrix-vector - Accelerates operations on sparse matrices, such as sparse matrix-vector
or matrix-matrix products. or matrix-matrix products.
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_ * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
- 0.2.4
- 0.2.3 - 0.2.3
- Accelerates operations on sparse matrices, such as sparse matrix-vector - Accelerates operations on sparse matrices, such as sparse matrix-vector
or matrix-matrix products. or matrix-matrix products.
* - `hipTensor <https://github.com/ROCm/hipTensor>`_ * - `hipTensor <https://github.com/ROCm/hipTensor>`_
- 2.0.0
- 1.5.0 - 1.5.0
- Optimizes for high-performance tensor operations, such as contractions. - Optimizes for high-performance tensor operations, such as contractions.
* - `MIOpen <https://github.com/ROCm/MIOpen>`_ * - `MIOpen <https://github.com/ROCm/MIOpen>`_
- 3.5.0
- 3.4.0 - 3.4.0
- Optimizes deep learning primitives such as convolutions, pooling, - Optimizes deep learning primitives such as convolutions, pooling,
normalization, and activation functions. normalization, and activation functions.
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_ * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
- 2.13.0
- 2.12.0 - 2.12.0
- Adds graph-level optimizations, ONNX models and mixed precision support - Adds graph-level optimizations, ONNX models and mixed precision support
and enable Ahead-of-Time (AOT) Compilation. and enable Ahead-of-Time (AOT) Compilation.
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_ * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
- 3.3.0
- 3.2.0 - 3.2.0
- Optimizes acceleration for computer vision and AI workloads like - Optimizes acceleration for computer vision and AI workloads like
preprocessing, augmentation, and inferencing. preprocessing, augmentation, and inferencing.
* - `rocAL <https://github.com/ROCm/rocAL>`_ * - `rocAL <https://github.com/ROCm/rocAL>`_
- :version-ref:`rocAL rocm_version` - 3.3.0
- 2.2.0
- Accelerates the data pipeline by offloading intensive preprocessing and - Accelerates the data pipeline by offloading intensive preprocessing and
augmentation tasks. rocAL is part of MIVisionX. augmentation tasks. rocAL is part of MIVisionX.
* - `RCCL <https://github.com/ROCm/rccl>`_ * - `RCCL <https://github.com/ROCm/rccl>`_
- 2.2.0 - 2.26.6
- 2.22.3
- Optimizes for multi-GPU communication for operations like AllReduce and - Optimizes for multi-GPU communication for operations like AllReduce and
Broadcast. Broadcast.
* - `rocDecode <https://github.com/ROCm/rocDecode>`_ * - `rocDecode <https://github.com/ROCm/rocDecode>`_
- 1.0.0
- 0.10.0 - 0.10.0
- Provides hardware-accelerated data decoding capabilities, particularly - Provides hardware-accelerated data decoding capabilities, particularly
for image, video, and other dataset formats. for image, video, and other dataset formats.
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_ * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
- 1.1.0
- 0.8.0 - 0.8.0
- Provides hardware-accelerated JPEG image decoding and encoding. - Provides hardware-accelerated JPEG image decoding and encoding.
* - `RPP <https://github.com/ROCm/RPP>`_ * - `RPP <https://github.com/ROCm/RPP>`_
- 2.0.0
- 1.9.10 - 1.9.10
- Speeds up data augmentation, transformation, and other preprocessing steps. - Speeds up data augmentation, transformation, and other preprocessing steps.
* - `rocThrust <https://github.com/ROCm/rocThrust>`_ * - `rocThrust <https://github.com/ROCm/rocThrust>`_
- 4.0.0
- 3.3.0 - 3.3.0
- Provides a C++ template library for parallel algorithms like sorting, - Provides a C++ template library for parallel algorithms like sorting,
reduction, and scanning. reduction, and scanning.
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_ * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
- 2.0.0
- 1.7.0 - 1.7.0
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
multiplication (GEMM) and accumulation operations with mixed precision multiplication (GEMM) and accumulation operations with mixed precision
@@ -253,26 +333,29 @@ Instead of listing them all, support is grouped into the following categories to
* DGL NN * DGL NN
* DGL Optim * DGL Optim
* DGL Sparse * DGL Sparse
* GraphBolt
Unsupported features Unsupported features
================================================================================ ================================================================================
* GraphBolt * TF32 Support (only supported for PyTorch 2.7 and above)
* Partial TF32 Support (MI250X only)
* Kineto/ROCTracer integration * Kineto/ROCTracer integration
Unsupported functions Unsupported functions
================================================================================ ================================================================================
* ``more_nnz`` * ``bfs``
* ``format`` * ``format``
* ``multiprocess_sparse_adam_state_dict`` * ``multiprocess_sparse_adam_state_dict``
* ``record_stream_ndarray``
* ``half_spmm`` * ``half_spmm``
* ``segment_mm`` * ``segment_mm``
* ``gather_mm_idx_b`` * ``gather_mm_idx_b``
* ``pgexplainer``
* ``sample_labors_prob`` * ``sample_labors_prob``
* ``sample_labors_noprob`` * ``sample_labors_noprob``
* ``sparse_admin``
Previous versions
===============================================================================
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
of the ``ROCm/dgl`` Docker image.

View File

@@ -45,7 +45,7 @@ llama.cpp is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
Supported devices Supported devices
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210 **Officially Supported**: AMD Instinct™ MI325X, MI300X, MI210
Use cases and recommendations Use cases and recommendations
================================================================================ ================================================================================
@@ -109,27 +109,27 @@ Click |docker-icon| to view the image on Docker Hub.
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_full/images/sha256-a2ecd635eaa65bb289a9041330128677f3ae88bee6fee0597424b17e38d4903c"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_server/images/sha256-cb46b47df415addb5ceb6e6fdf0be70bf9d7f6863bbe6e10c2441ecb84246d52"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_light/images/sha256-8f8536eec4b05c0ff1c022f9fc6c527ad1c89e6c1ca0906e4d39e4de73edbde9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__ - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__ - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- 24.04 - 24.04
* - .. raw:: html * - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_full/images/sha256-f36de2a3b03ae53e81c85422cb3780368c9891e1ac7884b04403a921fe2ea45d"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_server/images/sha256-df15e8ab11a6837cd3736644fec1e047465d49e37d610ab0b79df000371327df"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- .. raw:: html - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_light/images/sha256-4ea2d5bb7964f0ee3ea9b30ba7f343edd6ddfab1b1037669ca7eafad2e3c2bd7"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a> <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__ - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__ - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
- 22.04 - 22.04

View File

@@ -84,6 +84,8 @@ The table below summarizes information about ROCm-enabled deep learning framewor
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a> <a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
- -
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__ - `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__
- `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-wheels-package>`__
- .. raw:: html - .. raw:: html
<a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a> <a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a>

View File

@@ -67,7 +67,7 @@ Quick start examples:
export VLLM_ROCM_USE_AITER=1 export VLLM_ROCM_USE_AITER=1
vllm serve MODEL_NAME vllm serve MODEL_NAME
# Enable only AITER Triton Prefill-Decode (split) attention # Enable AITER Fused MoE and enable Triton Prefill-Decode (split) attention
export VLLM_ROCM_USE_AITER=1 export VLLM_ROCM_USE_AITER=1
export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0 export VLLM_ROCM_USE_AITER_MHA=0
@@ -244,14 +244,17 @@ Most users won't need this, but you can override the defaults:
* - AITER MHA (standard models) * - AITER MHA (standard models)
- ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models) - ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models)
* - AITER Triton Prefill-Decode (split) * - vLLM Triton Unified (default)
- ``VLLM_ROCM_USE_AITER=0`` (or unset)
* - Triton Prefill-Decode (split) without AITER
- | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
* - Triton Prefill-Decode (split) along with AITER Fused-MoE
- | ``VLLM_ROCM_USE_AITER=1`` - | ``VLLM_ROCM_USE_AITER=1``
| ``VLLM_ROCM_USE_AITER_MHA=0`` | ``VLLM_ROCM_USE_AITER_MHA=0``
| ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
* - vLLM Triton Unified (default)
- ``VLLM_ROCM_USE_AITER=0`` (or unset)
* - AITER Unified Attention * - AITER Unified Attention
- | ``VLLM_ROCM_USE_AITER=1`` - | ``VLLM_ROCM_USE_AITER=1``
| ``VLLM_ROCM_USE_AITER_MHA=0`` | ``VLLM_ROCM_USE_AITER_MHA=0``
@@ -269,11 +272,11 @@ Most users won't need this, but you can override the defaults:
--block-size 1 \ --block-size 1 \
--tensor-parallel-size 8 --tensor-parallel-size 8
# Advanced: Use Prefill-Decode split (for short input cases) # Advanced: Use Prefill-Decode split (for short input cases) with AITER Fused-MoE
VLLM_ROCM_USE_AITER=1 \ VLLM_ROCM_USE_AITER=1 \
VLLM_ROCM_USE_AITER_MHA=0 \ VLLM_ROCM_USE_AITER_MHA=0 \
VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \ VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \
vllm serve meta-llama/Llama-3.3-70B-Instruct vllm serve meta-llama/Llama-4-Scout-17B-16E
**Which backend should I choose?** **Which backend should I choose?**
@@ -352,14 +355,14 @@ vLLM V1 on ROCm provides these attention implementations:
3. **AITER Triton PrefillDecode Attention** (hybrid, Instinct MI300X-optimized) 3. **AITER Triton PrefillDecode Attention** (hybrid, Instinct MI300X-optimized)
* Enable with ``VLLM_ROCM_USE_AITER=1``, ``VLLM_ROCM_USE_AITER_MHA=0``, and ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` * Enable with ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
* Uses separate kernels for prefill and decode phases: * Uses separate kernels for prefill and decode phases:
* **Prefill**: ``context_attention_fwd`` Triton kernel * **Prefill**: ``context_attention_fwd`` Triton kernel
* **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 116, context ≤131k; sliding window not supported) * **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 116, context ≤131k; sliding window not supported)
* **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements * **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements
* Usually better compared to unified Triton kernels (both vLLM and AITER variants) * Usually better compared to unified Triton kernels
* Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios * Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios
* The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**) * The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**)
@@ -695,7 +698,9 @@ There are two strategies:
vLLM engine arguments vLLM engine arguments
===================== =====================
Selected arguments that often help on ROCm. See `engine args docs <https://docs.vllm.ai/en/latest/serving/engine_args.html>`_ for the full list. Selected arguments that often help on ROCm. See `Engine Arguments
<https://docs.vllm.ai/en/stable/configuration/engine_args.html>`__ in the vLLM
documentation for the full list.
Configure --max-num-seqs Configure --max-num-seqs
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -22,7 +22,7 @@ See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `officia
<https://docs.vllm.ai/>`_ for more information. <https://docs.vllm.ai/>`_ for more information.
For guidance on using vLLM with ROCm, refer to `Installation with ROCm For guidance on using vLLM with ROCm, refer to `Installation with ROCm
<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html>`_. <https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#amd-rocm>`__.
vLLM installation vLLM installation
----------------- -----------------

View File

@@ -1,4 +1,4 @@
rocm-docs-core==1.28.0 rocm-docs-core==1.29.0
sphinx-reredirects sphinx-reredirects
sphinx-sitemap sphinx-sitemap
sphinxcontrib.datatemplates==0.11.0 sphinxcontrib.datatemplates==0.11.0

View File

@@ -187,7 +187,7 @@ requests==2.32.5
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core==1.28.0 rocm-docs-core==1.29.0
# via -r requirements.in # via -r requirements.in
rpds-py==0.28.0 rpds-py==0.28.0
# via # via

View File

@@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-7.1.1"
remote="rocm-org"
sync-c="true"
sync-j="4" />
<!--list of projects for ROCm-->
<project name="ROCK-Kernel-Driver" />
<project name="amdsmi" />
<project name="rocm_bandwidth_test" />
<project name="rocm-examples" />
<!--HIP Projects-->
<project name="HIPIFY" />
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
<project name="half" />
<project name="llvm-project" />
<project name="spirv-llvm-translator" />
<!-- gdb projects -->
<project name="ROCdbgapi" />
<project name="ROCgdb" />
<project name="rocr_debug_agent" />
<!-- ROCm Libraries -->
<project groups="mathlibs" name="AMDMIGraphX" />
<project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipSOLVER" />
<project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" />
<project groups="mathlibs" name="rocAL" />
<project groups="mathlibs" name="rocALUTION" />
<project groups="mathlibs" name="rocDecode" />
<project groups="mathlibs" name="rocJPEG" />
<!-- The following components have been migrated to rocm-libraries:
hipBLAS-common hipBLAS hipBLASLt hipCUB
hipFFT hipRAND hipSPARSE hipSPARSELt
MIOpen rocBLAS rocFFT rocPRIM rocRAND
rocSPARSE rocThrust Tensile -->
<project groups="mathlibs" name="rocm-libraries" />
<!-- The following components have been migrated to rocm-systems:
aqlprofile clr hip hip-tests hipother
rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute
rocprofiler-register rocprofiler-sdk rocprofiler-systems
rocprofiler rocr-runtime roctracer -->
<project groups="mathlibs" name="rocm-systems" />
<project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocSOLVER" />
<project groups="mathlibs" name="rocWMMA" />
<project groups="mathlibs" name="rocm-cmake" />
<project groups="mathlibs" name="rpp" />
<project groups="mathlibs" name="TransferBench" />
<!-- Projects for OpenMP-Extras -->
<project name="aomp" path="openmp-extras/aomp" />
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
<project name="flang" path="openmp-extras/flang" />
</manifest>