From 6ba30f191cf5f8eea22f636d8c93c53cfb23fb7b Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Thu, 6 Nov 2025 11:38:07 -0500 Subject: [PATCH 01/25] [Ex CI] rocWMMA increase timeout for test job (#5620) --- .azuredevops/components/rocWMMA.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azuredevops/components/rocWMMA.yml b/.azuredevops/components/rocWMMA.yml index 88cf75a99..d9f15f68a 100644 --- a/.azuredevops/components/rocWMMA.yml +++ b/.azuredevops/components/rocWMMA.yml @@ -142,7 +142,7 @@ jobs: - ${{ if eq(parameters.unifiedBuild, False) }}: - ${{ each job in parameters.jobMatrix.testJobs }}: - job: ${{ parameters.componentName }}_test_${{ job.target }} - timeoutInMinutes: 270 + timeoutInMinutes: 350 dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} condition: and(succeeded(), From ee4cf66d6722099e96ae598662d13d26994e49f0 Mon Sep 17 00:00:00 2001 From: Joseph Macaranas <145489236+jayhawk-commits@users.noreply.github.com> Date: Fri, 7 Nov 2025 00:59:35 -0500 Subject: [PATCH 02/25] [External CI] Add simde-devel in dnf mapping (#5635) --- .azuredevops/templates/steps/dependencies-dnf.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.azuredevops/templates/steps/dependencies-dnf.yml b/.azuredevops/templates/steps/dependencies-dnf.yml index 81d2a045e..44a48b825 100644 --- a/.azuredevops/templates/steps/dependencies-dnf.yml +++ b/.azuredevops/templates/steps/dependencies-dnf.yml @@ -63,6 +63,7 @@ parameters: libopenblas-dev: openblas-devel libopenmpi-dev: openmpi-devel libpci-dev: libpciaccess-devel + libsimde-dev: simde-devel libssl-dev: openssl-devel # note: libstdc++-devel is in the base packages list libsystemd-dev: systemd-devel From 9770e9b6ef3a2e9adfd3ca024493e6f81bbe9646 Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Fri, 7 Nov 2025 16:08:46 -0500 Subject: [PATCH 03/25] [Ex CI] hiptensor Enablement (#5636) --- .azuredevops/components/hipTensor.yml | 112 ++++++++++++++++---------- 1 file changed, 70 insertions(+), 42 deletions(-) diff --git a/.azuredevops/components/hipTensor.yml b/.azuredevops/components/hipTensor.yml index dddddad4e..3a55817a0 100644 --- a/.azuredevops/components/hipTensor.yml +++ b/.azuredevops/components/hipTensor.yml @@ -1,10 +1,29 @@ parameters: +- name: componentName + type: string + default: hipTensor - name: checkoutRepo type: string default: 'self' - name: checkoutRef type: string default: '' +# monorepo related parameters +- name: sparseCheckoutDir + type: string + default: '' +- name: triggerDownstreamJobs + type: boolean + default: false +- name: downstreamAggregateNames + type: string + default: '' +- name: buildDependsOn + type: object + default: null +- name: unifiedBuild + type: boolean + default: false # set to true if doing full build of ROCm stack # and dependencies are pulled from same pipeline - name: aggregatePipeline @@ -51,7 +70,7 @@ parameters: jobs: - ${{ each job in parameters.jobMatrix.buildJobs }}: - - job: hipTensor_build_${{ job.target }} + - job: ${{ parameters.componentName }}_build_${{ job.target }} variables: - group: common - template: /.azuredevops/variables-global.yml @@ -66,12 +85,15 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: checkoutRepo: ${{ parameters.checkoutRepo }} + sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters: checkoutRef: ${{ parameters.checkoutRef }} dependencyList: ${{ parameters.rocmDependencies }} gpuTarget: ${{ job.target }} aggregatePipeline: ${{ parameters.aggregatePipeline }} + ${{ if parameters.triggerDownstreamJobs }}: + downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml parameters: extraBuildFlags: >- @@ -85,9 +107,12 @@ jobs: -GNinja - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml parameters: + componentName: ${{ parameters.componentName }} + sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml parameters: + componentName: ${{ parameters.componentName }} gpuTarget: ${{ job.target }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml @@ -95,44 +120,47 @@ jobs: aptPackages: ${{ parameters.aptPackages }} gpuTarget: ${{ job.target }} -- ${{ each job in parameters.jobMatrix.testJobs }}: - - job: hipTensor_test_${{ job.target }} - timeoutInMinutes: 90 - dependsOn: hipTensor_build_${{ job.target }} - condition: - and(succeeded(), - eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), - not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])), - eq(${{ parameters.aggregatePipeline }}, False) - ) - variables: - - group: common - - template: /.azuredevops/variables-global.yml - pool: ${{ job.target }}_test_pool - workspace: - clean: all - steps: - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - parameters: - aptPackages: ${{ parameters.aptPackages }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml - parameters: - gpuTarget: ${{ job.target }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml - parameters: - checkoutRef: ${{ parameters.checkoutRef }} - dependencyList: ${{ parameters.rocmTestDependencies }} - gpuTarget: ${{ job.target }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - parameters: - componentName: hipTensor - testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor' - testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml' - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - parameters: - aptPackages: ${{ parameters.aptPackages }} - environment: test - gpuTarget: ${{ job.target }} +- ${{ if eq(parameters.unifiedBuild, False) }}: + - ${{ each job in parameters.jobMatrix.testJobs }}: + - job: ${{ parameters.componentName }}_test_${{ job.target }} + timeoutInMinutes: 90 + dependsOn: ${{ parameters.componentName }}_build_${{ job.target }} + condition: + and(succeeded(), + eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), + not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), + eq(${{ parameters.aggregatePipeline }}, False) + ) + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: ${{ job.target }}_test_pool + workspace: + clean: all + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + parameters: + gpuTarget: ${{ job.target }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + checkoutRef: ${{ parameters.checkoutRef }} + dependencyList: ${{ parameters.rocmTestDependencies }} + gpuTarget: ${{ job.target }} + ${{ if parameters.triggerDownstreamJobs }}: + downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: ${{ parameters.componentName }} + testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor' + testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml' + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + environment: test + gpuTarget: ${{ job.target }} From 148d6670ad3138986c6531af34869c07fecaea58 Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Fri, 7 Nov 2025 17:48:36 -0500 Subject: [PATCH 04/25] rocBLAS and HipBLASLt known issue added 7.1.0 (#5634) * rocBLAS and HipBLASLt known issue added * Title warning fixed * Jeff's feedback added * Leo's feedback incorporated * Minor feedback * MI325X PLDM udpate * Leo's feedback added * PyTorch profiling issue added * Changelog synced * JAX section removed * Ram's feedback added --- CHANGELOG.md | 18 ++++++----- RELEASE.md | 31 ++++++++++++++----- .../tensorflow-compatibility.rst | 6 ++-- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b379f383b..8bfd882db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,10 +48,6 @@ for a complete overview of this release. * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios. -```{note} -See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions. -``` - ### **Composable Kernel** (1.1.0) #### Added @@ -493,7 +489,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300. * Interactive metric descriptions in TUI analyze mode. * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab. -* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option. +* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option. * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW @@ -579,7 +575,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum. * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation. * The profile mode crashes when `--format-rocprof-output json` is selected. - * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data. + * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data. ### **ROCm Data Center Tool** (1.2.0) @@ -620,6 +616,14 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc - Updated PAPI module to v7.2.0b2. - ROCprofiler-SDK is now used for tracing OMPT API calls. +#### Known issues + +* PyTorch and other Python applications might fail to profile device activities when it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For PyTorch use: + +``` +export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH +``` + ### **rocPRIM** (4.1.0) #### Added @@ -699,7 +703,7 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE #### Added -* Hybrid computation support for existing routines: STEQR +* Hybrid computation support for existing STEQR routines. #### Optimized diff --git a/RELEASE.md b/RELEASE.md index 5f897ad33..eaf238122 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -53,6 +53,10 @@ For more information about supported: * Operating systems, see [Supported operating systems](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems) and [ROCm installation for Linux](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/). +```{note} +Starting ROCm 7.1.0, Upstream Inter-Process Communication (IPC) works with Checkpoint Restore in User space (CRIU) feature, but it requires the most up-to-date kernel and CRIU plugin. +``` + #### Virtualization support ROCm 7.1.0 adds Guest OS support for RHEL 10.0 in KVM SR-IOV for AMD Instinct MI355X and MI350X GPUs. @@ -119,8 +123,7 @@ firmware, AMD GPU drivers, and the ROCm user space software. MI325X - 01.25.05.01
- 01.25.04.02 + 01.25.04.02[2] 30.20.0
@@ -174,6 +177,7 @@ firmware, AMD GPU drivers, and the ROCm user space software.

[1]: PLDM bundle 01.25.05.00 will be available by November 2025.

+

[2]: If using KVM SR-IOV, it’s recommended not to use AMD GPU Driver (amdgpu) 30.20.0 with PLDM bundle 01.25.04.02.

#### AMD SMI improvement: Set power cap @@ -317,11 +321,6 @@ matrix](../../docs/compatibility/compatibility-matrix.rst) for the complete list Torch-MIGraphX integrates the AMD graph inference engine with the PyTorch ecosystem. It provides a `mgx_module` object that may be invoked in the same manner as any other torch module, but utilizes the MIGraphX inference engine internally. Although Torch-MIGraphX has been available in previous releases, installable WHL files are now officially published. -#### JAX - -* JAX customers can now use Llama-2 with JAX efficiently. -* The latest public JAX repo is {fab}`github` [rocm-jax](https://github.com/ROCm/rocm-jax/tree/master). - #### TensorFlow ROCm 7.1.0 enables support for TensorFlow 2.20.0. @@ -1181,7 +1180,7 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300. * Interactive metric descriptions in TUI analyze mode. * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab. -* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option. +* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option. * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW @@ -1308,6 +1307,14 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid - Updated PAPI module to v7.2.0b2. - ROCprofiler-SDK is now used for tracing OMPT API calls. +#### Known issues + +* PyTorch and other Python applications might fail to profile device activities when it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For PyTorch use: + +``` +export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH +``` + ### **rocPRIM** (4.1.0) #### Added @@ -1498,6 +1505,14 @@ ROCgdb might fail when running the `step-schedlock-spurious-waves.exp` test case Due to a missing `rocm-core` dependency from the ROCm Bandwidth Test, you can't cleanly uninstall ROCm Bandwidth Test using the `amdgpu-install` script. As a workaround, uninstall ROCm Bandwidth Test manually, using the native package managers. For more information, see [Installation via native package manager](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager-index.html). The issue will be fixed in a future ROCm release. See [GitHub issue #5611](https://github.com/ROCm/ROCm/issues/5611). +### OpenBLAS runtime dependency for hipblastlt-test and hipblaslt-bench + +Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS development package results in the following error: +``` +libopenblas.so.0: cannot open shared object file: No such file or directory +``` +As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. + ## ROCm resolved issues The following are previously known issues resolved in this release. For resolved issues related to diff --git a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst index 485980d13..532e3b86b 100644 --- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst +++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst @@ -136,7 +136,7 @@ The following section maps supported data types and GPU-accelerated TensorFlow features to their minimum supported ROCm and TensorFlow versions. Data types -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +----------------- The data type of a tensor is specified using the ``dtype`` attribute or argument, and TensorFlow supports a wide range of data types for different use @@ -254,7 +254,7 @@ are as follows: - 1.7 Features -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +----------------- This table provides an overview of key features in TensorFlow and their availability in ROCm. @@ -346,7 +346,7 @@ availability in ROCm. - 1.9.2 Distributed library features -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +------------------------------------- Enables developers to scale computations across multiple devices on a single machine or across multiple machines. From 4881887e2cb95cdac29bd1e5142a08113efc5ac1 Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Fri, 7 Nov 2025 19:45:33 -0500 Subject: [PATCH 05/25] rocBLAS precision known issue added [Develop] (#5641) * rocBLAS precision known issue added * IPC note removed * Review feedback added --- CHANGELOG.md | 2 +- RELEASE.md | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bfd882db..1b49379b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -618,7 +618,7 @@ for a complete overview of this release. #### Known issues -* PyTorch and other Python applications might fail to profile device activities when it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For PyTorch use: +* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment: ``` export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH diff --git a/RELEASE.md b/RELEASE.md index eaf238122..cd805b2b7 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -53,10 +53,6 @@ For more information about supported: * Operating systems, see [Supported operating systems](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems) and [ROCm installation for Linux](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/). -```{note} -Starting ROCm 7.1.0, Upstream Inter-Process Communication (IPC) works with Checkpoint Restore in User space (CRIU) feature, but it requires the most up-to-date kernel and CRIU plugin. -``` - #### Virtualization support ROCm 7.1.0 adds Guest OS support for RHEL 10.0 in KVM SR-IOV for AMD Instinct MI355X and MI350X GPUs. @@ -1309,7 +1305,7 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid #### Known issues -* PyTorch and other Python applications might fail to profile device activities when it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For PyTorch use: +* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment: ``` export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH @@ -1511,7 +1507,11 @@ Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS de ``` libopenblas.so.0: cannot open shared object file: No such file or directory ``` -As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. +As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. See [GitHub issue #5639](https://github.com/ROCm/ROCm/issues/5639). + +### Reduced precision in gemm_ex operations for rocBLAS and hipBLAS + +Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640). ## ROCm resolved issues From 420bbfa12658f249fb62c7982c548d999b4bd110 Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Sat, 8 Nov 2025 09:08:21 -0500 Subject: [PATCH 06/25] 7.1.0 MI325X PLDM note updated (#5644) * PLDM note updated * Footnote update * Note added to compatibility * Lint error fixed --- RELEASE.md | 10 +++++----- .../compatibility-matrix-historical-6.0.csv | 2 +- docs/compatibility/compatibility-matrix.rst | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index cd805b2b7..a7ef63f2e 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -117,12 +117,12 @@ firmware, AMD GPU drivers, and the ROCm user space software. 30.10 - MI325X + MI325X[2] - 01.25.04.02[2] + 01.25.04.02 - 30.20.0
+ 30.20.0[*]
30.10.2
30.10.1
30.10
@@ -173,7 +173,7 @@ firmware, AMD GPU drivers, and the ROCm user space software.

[1]: PLDM bundle 01.25.05.00 will be available by November 2025.

-

[2]: If using KVM SR-IOV, it’s recommended not to use AMD GPU Driver (amdgpu) 30.20.0 with PLDM bundle 01.25.04.02.

+

[2]: For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.

#### AMD SMI improvement: Set power cap @@ -1485,7 +1485,7 @@ issues related to individual components, review the [Detailed component changes] Applications using the MIGraphX Python API will fail when running on Python 3.13 and return the error message `AttributeError: module 'migraphx' has no attribute 'parse_onnx'`. The issue doesn't occur when you manually build MIGraphX. For detailed instructions, see [Building from source](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/install/building_migraphx.html). As a workaround, change the Python version to the one found in the installed location: ``` -ls -l /opt/rocm-7.0.0/lib/libmigraphx_py_*.so +ls -l /opt/rocm-7.1.0/lib/libmigraphx_py_*.so ``` The issue will be resolved in a future ROCm release. See [GitHub issue #5500](https://github.com/ROCm/ROCm/issues/5500). diff --git a/docs/compatibility/compatibility-matrix-historical-6.0.csv b/docs/compatibility/compatibility-matrix-historical-6.0.csv index f0c6ff263..dd2cb27fe 100644 --- a/docs/compatibility/compatibility-matrix-historical-6.0.csv +++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv @@ -53,7 +53,7 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6 CUB,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 ,,,,,,,,,,,,,,,,,,,,, DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,, - :doc:`AMD GPU Driver `,"30.20.0, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x" + :doc:`AMD GPU Driver `,"30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x" ,,,,,,,,,,,,,,,,,,,,, ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,, :doc:`Composable Kernel `,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0 diff --git a/docs/compatibility/compatibility-matrix.rst b/docs/compatibility/compatibility-matrix.rst index 972ca02c6..b08216b86 100644 --- a/docs/compatibility/compatibility-matrix.rst +++ b/docs/compatibility/compatibility-matrix.rst @@ -70,7 +70,7 @@ compatibility and system requirements. CUB,2.8.5,2.6.0,2.5.0 ,,, DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,, - :doc:`AMD GPU Driver `,"30.20.0, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x" + :doc:`AMD GPU Driver `,"30.20.0 [#mi325x_KVM]_, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x" ,,, ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,, :doc:`Composable Kernel `,1.1.0,1.1.0,1.1.0 @@ -185,6 +185,7 @@ compatibility and system requirements. .. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. .. [#dgl_compat] DGL is supported only on ROCm 6.4.0. .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x. +.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0. .. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix `_. .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package. @@ -309,6 +310,7 @@ Expand for full historical view of: .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1. .. [#llama-cpp_compat-past-60] llama.cpp is supported only on ROCm 7.0.0 and 6.4.x. .. [#flashinfer_compat-past-60] FlashInfer is supported only on ROCm 6.4.1. + .. [#mi325x_KVM-past-60] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0. .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix `_. .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package. From c8531ac7ea0a860f68336f13d29969428e5f425b Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Mon, 10 Nov 2025 13:32:10 -0500 Subject: [PATCH 07/25] [Ex CI] Update pipeline Id for hipTensor to monorepo (#5638) --- .azuredevops/templates/steps/dependencies-rocm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azuredevops/templates/steps/dependencies-rocm.yml b/.azuredevops/templates/steps/dependencies-rocm.yml index 99a08b72b..cc8aa57f2 100644 --- a/.azuredevops/templates/steps/dependencies-rocm.yml +++ b/.azuredevops/templates/steps/dependencies-rocm.yml @@ -115,7 +115,7 @@ parameters: developBranch: develop hasGpuTarget: true hipTensor: - pipelineId: 105 + pipelineId: 374 developBranch: develop hasGpuTarget: true llvm-project: From 39de859bd11d338c6455dc682664d93e9dc8f570 Mon Sep 17 00:00:00 2001 From: Alex Xu Date: Mon, 10 Nov 2025 14:10:01 -0500 Subject: [PATCH 08/25] update rocm-docs-core to 1.29.0 --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 4b700d8b7..e395875a0 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1,4 +1,4 @@ -rocm-docs-core==1.27.0 +rocm-docs-core==1.28.0 sphinx-reredirects sphinx-sitemap sphinxcontrib.datatemplates==0.11.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 832b43108..8d602d549 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile docs/sphinx/requirements.in +# pip-compile requirements.in # accessible-pygments==0.0.5 # via pydata-sphinx-theme @@ -29,7 +29,7 @@ cffi==2.0.0 # via # cryptography # pynacl -charset-normalizer==3.4.3 +charset-normalizer==3.4.4 # via requests click==8.3.0 # via @@ -37,7 +37,7 @@ click==8.3.0 # sphinx-external-toc comm==0.2.3 # via ipykernel -cryptography==46.0.2 +cryptography==46.0.3 # via pyjwt debugpy==1.8.17 # via ipykernel @@ -64,7 +64,7 @@ gitpython==3.1.45 # via rocm-docs-core greenlet==3.2.4 # via sqlalchemy -idna==3.10 +idna==3.11 # via requests imagesize==1.4.1 # via sphinx @@ -72,7 +72,7 @@ importlib-metadata==8.7.0 # via # jupyter-cache # myst-nb -ipykernel==6.30.1 +ipykernel==7.1.0 # via myst-nb ipython==8.37.0 # via @@ -94,7 +94,7 @@ jupyter-client==8.6.3 # via # ipykernel # nbclient -jupyter-core==5.8.1 +jupyter-core==5.9.1 # via # ipykernel # jupyter-client @@ -106,7 +106,7 @@ markdown-it-py==3.0.0 # myst-parser markupsafe==3.0.3 # via jinja2 -matplotlib-inline==0.1.7 +matplotlib-inline==0.2.1 # via # ipykernel # ipython @@ -137,11 +137,11 @@ parso==0.8.5 # via jedi pexpect==4.9.0 # via ipython -platformdirs==4.4.0 +platformdirs==4.5.0 # via jupyter-core prompt-toolkit==3.0.52 # via ipython -psutil==7.1.0 +psutil==7.1.3 # via ipykernel ptyprocess==0.7.0 # via pexpect @@ -163,7 +163,7 @@ pygments==2.19.2 # sphinx pyjwt[crypto]==2.10.1 # via pygithub -pynacl==1.6.0 +pynacl==1.6.1 # via pygithub python-dateutil==2.9.0.post0 # via jupyter-client @@ -179,7 +179,7 @@ pyzmq==27.1.0 # via # ipykernel # jupyter-client -referencing==0.36.2 +referencing==0.37.0 # via # jsonschema # jsonschema-specifications @@ -187,9 +187,9 @@ requests==2.32.5 # via # pygithub # sphinx -rocm-docs-core==1.27.0 +rocm-docs-core==1.28.0 # via -r requirements.in -rpds-py==0.27.1 +rpds-py==0.28.0 # via # jsonschema # referencing @@ -249,13 +249,13 @@ sphinxcontrib-runcmd==0.2.0 # via sphinxcontrib-datatemplates sphinxcontrib-serializinghtml==2.0.0 # via sphinx -sqlalchemy==2.0.43 +sqlalchemy==2.0.44 # via jupyter-cache stack-data==0.6.3 # via ipython tabulate==0.9.0 # via jupyter-cache -tomli==2.2.1 +tomli==2.3.0 # via sphinx tornado==6.5.2 # via From 04c7374f419887df46bd7b6777ab43b5a2e58b6a Mon Sep 17 00:00:00 2001 From: anisha-amd Date: Mon, 10 Nov 2025 15:26:54 -0500 Subject: [PATCH 09/25] Docs: frameworks 25.10 - compatibility - DGL and llama.cpp (#5648) --- .wordlist.txt | 5 + .../compatibility-matrix-historical-6.0.csv | 4 +- docs/compatibility/compatibility-matrix.rst | 4 +- .../ml-compatibility/dgl-compatibility.rst | 147 ++++++++++++++---- .../llama-cpp-compatibility.rst | 18 +-- docs/how-to/deep-learning-rocm.rst | 2 + 6 files changed, 135 insertions(+), 45 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index aed9dc1cc..38e475b40 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -139,6 +139,7 @@ EoS etcd fas FBGEMM +FiLM FIFOs FFT FFTs @@ -159,10 +160,12 @@ Fortran Fuyu GALB GAT +GATNE GCC GCD GCDs GCN +GCNN GDB GDDR GDR @@ -181,6 +184,8 @@ Glibc GLXT Gloo GMI +GNN +GNNs GPG GPR GPT diff --git a/docs/compatibility/compatibility-matrix-historical-6.0.csv b/docs/compatibility/compatibility-matrix-historical-6.0.csv index dd2cb27fe..b96b1c4cc 100644 --- a/docs/compatibility/compatibility-matrix-historical-6.0.csv +++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv @@ -35,11 +35,11 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6 :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26 :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A - :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A + :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,2.4.0,2.4.0,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A - :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,b6356,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A + :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,b6652,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A `ONNX Runtime `_,1.22.0,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1 ,,,,,,,,,,,,,,,,,,,,, diff --git a/docs/compatibility/compatibility-matrix.rst b/docs/compatibility/compatibility-matrix.rst index b08216b86..ba4602327 100644 --- a/docs/compatibility/compatibility-matrix.rst +++ b/docs/compatibility/compatibility-matrix.rst @@ -183,7 +183,7 @@ compatibility and system requirements. .. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7. .. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10. .. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. -.. [#dgl_compat] DGL is supported only on ROCm 6.4.0. +.. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x. .. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0. .. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0. @@ -304,7 +304,7 @@ Expand for full historical view of: .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. .. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0. .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0. - .. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0. + .. [#dgl_compat-past-60] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0. .. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2. .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1. diff --git a/docs/compatibility/ml-compatibility/dgl-compatibility.rst b/docs/compatibility/ml-compatibility/dgl-compatibility.rst index 3c18ce100..fad6924df 100644 --- a/docs/compatibility/ml-compatibility/dgl-compatibility.rst +++ b/docs/compatibility/ml-compatibility/dgl-compatibility.rst @@ -39,13 +39,13 @@ Support overview Version support -------------------------------------------------------------------------------- -DGL is supported on `ROCm 6.4.0 `__. +DGL is supported on `ROCm 7.0.0 `__, +`ROCm 6.4.3 `__, and `ROCm 6.4.0 `__. Supported devices -------------------------------------------------------------------------------- -- **Officially Supported**: AMD Instinct™ MI300X (through `hipBLASlt `__) -- **Partially Supported**: AMD Instinct™ MI250X +**Officially Supported**: AMD Instinct™ MI300X, MI250X .. _dgl-recommendations: @@ -60,16 +60,35 @@ GAT, GCN, and GraphSage. Using these models, a variety of use cases are supporte - 1D (Temporal) and 2D (Image) Classification - Drug Discovery -Multiple use cases of DGL have been tested and verified. -However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``. -Refer to the `AMD ROCm blog `_, -where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs. +For use cases and recommendations, refer to the `AMD ROCm blog `__, +where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs. -Coverage includes: +* Although multiple use cases of DGL have been tested and verified, a few have been + outlined in the `DGL in the Real World: Running GNNs on Real Use Cases + `__ blog + post, which walks through four real-world graph neural network (GNN) workloads + implemented with the Deep Graph Library on ROCm. It covers tasks ranging from + heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph + regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use + case, the authors detail: the dataset and task, how DGL is used, and their experience + porting to ROCm. It is shown that DGL codebases often run without modification, with + seamless integration of graph operations, message passing, sampling, and convolution. -- Single-GPU training/inference -- Multi-GPU training +* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware + `__ + blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform, + bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges + the gap between dense tensor frameworks and the irregular nature of graph data through a + graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and + interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration + enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers + and open-source repositories. This marks a major step in AMD's mission to advance open, + scalable AI ecosystems beyond traditional architectures. +You can pre-process datasets and begin training on AMD GPUs through: + +* Single-GPU training/inference +* Multi-GPU training .. _dgl-docker-compat: @@ -85,7 +104,7 @@ with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the latest available DGL version from the official Docker Hub. Click the |docker-icon| to view the image on Docker Hub. -.. list-table:: DGL Docker image components +.. list-table:: :header-rows: 1 :class: docker-image-compatibility @@ -98,43 +117,83 @@ Click the |docker-icon| to view the image on Docker Hub. * - .. raw:: html - + rocm/dgl - - `6.4.0 `__. + - `7.0.0 `__ - `2.4.0 `__ - - `2.6.0 `__ + - `2.8.0 `__ - 24.04 - `3.12.9 `__ * - .. raw:: html - + rocm/dgl - - `6.4.0 `__. + - `7.0.0 `__ - `2.4.0 `__ - - `2.4.1 `__ + - `2.6.0 `__ + - 24.04 + - `3.12.9 `__ + + * - .. raw:: html + + rocm/dgl + + - `7.0.0 `__ + - `2.4.0 `__ + - `2.7.1 `__ + - 22.04 + - `3.10.16 `__ + + * - .. raw:: html + + rocm/dgl + + - `6.4.3 `__ + - `2.4.0 `__ + - `2.6.0 `__ + - 24.04 + - `3.12.9 `__ + + * - .. raw:: html + + rocm/dgl + + - `6.4.0 `__ + - `2.4.0 `__ + - `2.6.0 `__ + - 24.04 + - `3.12.9 `__ + + * - .. raw:: html + + rocm/dgl + + - `6.4.0 `__ + - `2.4.0 `__ + - `2.4.1 `__ - 24.04 - `3.12.9 `__ * - .. raw:: html - + rocm/dgl - - `6.4.0 `__. + - `6.4.0 `__ - `2.4.0 `__ - - `2.4.1 `__ + - `2.4.1 `__ - 22.04 - `3.10.16 `__ * - .. raw:: html - + rocm/dgl - - `6.4.0 `__. + - `6.4.0 `__ - `2.4.0 `__ - - `2.3.0 `__ + - `2.3.0 `__ - 22.04 - `3.10.16 `__ @@ -150,81 +209,102 @@ If you prefer to build it yourself, ensure the following dependencies are instal :header-rows: 1 * - ROCm library - - ROCm 6.4.0 Version + - ROCm 7.0.0 Version + - ROCm 6.4.x Version - Purpose * - `Composable Kernel `_ + - 1.1.0 - 1.1.0 - Enables faster execution of core operations like matrix multiplication (GEMM), convolutions and transformations. * - `hipBLAS `_ + - 3.0.0 - 2.4.0 - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for matrix and vector operations. * - `hipBLASLt `_ + - 1.0.0 - 0.12.0 - hipBLASLt is an extension of the hipBLAS library, providing additional features like epilogues fused into the matrix multiplication kernel or use of integer tensor cores. * - `hipCUB `_ + - 4.0.0 - 3.4.0 - Provides a C++ template library for parallel algorithms for reduction, scan, sort and select. * - `hipFFT `_ + - 1.0.20 - 1.0.18 - Provides GPU-accelerated Fast Fourier Transform (FFT) operations. * - `hipRAND `_ + - 3.0.0 - 2.12.0 - Provides fast random number generation for GPUs. * - `hipSOLVER `_ + - 3.0.0 - 2.4.0 - Provides GPU-accelerated solvers for linear systems, eigenvalues, and singular value decompositions (SVD). * - `hipSPARSE `_ + - 4.0.1 - 3.2.0 - Accelerates operations on sparse matrices, such as sparse matrix-vector or matrix-matrix products. * - `hipSPARSELt `_ + - 0.2.4 - 0.2.3 - Accelerates operations on sparse matrices, such as sparse matrix-vector or matrix-matrix products. * - `hipTensor `_ + - 2.0.0 - 1.5.0 - Optimizes for high-performance tensor operations, such as contractions. * - `MIOpen `_ + - 3.5.0 - 3.4.0 - Optimizes deep learning primitives such as convolutions, pooling, normalization, and activation functions. * - `MIGraphX `_ + - 2.13.0 - 2.12.0 - Adds graph-level optimizations, ONNX models and mixed precision support and enable Ahead-of-Time (AOT) Compilation. * - `MIVisionX `_ + - 3.3.0 - 3.2.0 - Optimizes acceleration for computer vision and AI workloads like preprocessing, augmentation, and inferencing. * - `rocAL `_ - - :version-ref:`rocAL rocm_version` + - 3.3.0 + - 2.2.0 - Accelerates the data pipeline by offloading intensive preprocessing and augmentation tasks. rocAL is part of MIVisionX. * - `RCCL `_ - - 2.2.0 + - 2.26.6 + - 2.22.3 - Optimizes for multi-GPU communication for operations like AllReduce and Broadcast. * - `rocDecode `_ + - 1.0.0 - 0.10.0 - Provides hardware-accelerated data decoding capabilities, particularly for image, video, and other dataset formats. * - `rocJPEG `_ + - 1.1.0 - 0.8.0 - Provides hardware-accelerated JPEG image decoding and encoding. * - `RPP `_ + - 2.0.0 - 1.9.10 - Speeds up data augmentation, transformation, and other preprocessing steps. * - `rocThrust `_ + - 4.0.0 - 3.3.0 - Provides a C++ template library for parallel algorithms like sorting, reduction, and scanning. * - `rocWMMA `_ + - 2.0.0 - 1.7.0 - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix multiplication (GEMM) and accumulation operations with mixed precision @@ -253,26 +333,29 @@ Instead of listing them all, support is grouped into the following categories to * DGL NN * DGL Optim * DGL Sparse - +* GraphBolt Unsupported features ================================================================================ -* GraphBolt -* Partial TF32 Support (MI250X only) +* TF32 Support (only supported for PyTorch 2.7 and above) * Kineto/ROCTracer integration Unsupported functions ================================================================================ -* ``more_nnz`` +* ``bfs`` * ``format`` * ``multiprocess_sparse_adam_state_dict`` -* ``record_stream_ndarray`` * ``half_spmm`` * ``segment_mm`` * ``gather_mm_idx_b`` -* ``pgexplainer`` * ``sample_labors_prob`` * ``sample_labors_noprob`` +* ``sparse_admin`` + +Previous versions +=============================================================================== +See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases +of the ``ROCm/dgl`` Docker image. \ No newline at end of file diff --git a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst index b79baf253..9c958afdf 100644 --- a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst +++ b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst @@ -45,7 +45,7 @@ llama.cpp is supported on `ROCm 7.0.0 `__ Supported devices -------------------------------------------------------------------------------- -**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210 +**Officially Supported**: AMD Instinct™ MI325X, MI300X, MI210 Use cases and recommendations ================================================================================ @@ -109,27 +109,27 @@ Click |docker-icon| to view the image on Docker Hub. * - .. raw:: html - rocm/llama.cpp + rocm/llama.cpp - .. raw:: html - rocm/llama.cpp + rocm/llama.cpp - .. raw:: html - rocm/llama.cpp - - `b6356 `__ + rocm/llama.cpp + - `b6652 `__ - `7.0.0 `__ - 24.04 * - .. raw:: html - rocm/llama.cpp + rocm/llama.cpp - .. raw:: html - rocm/llama.cpp + rocm/llama.cpp - .. raw:: html - rocm/llama.cpp - - `b6356 `__ + rocm/llama.cpp + - `b6652 `__ - `7.0.0 `__ - 22.04 diff --git a/docs/how-to/deep-learning-rocm.rst b/docs/how-to/deep-learning-rocm.rst index 8a535ea54..b19b459df 100644 --- a/docs/how-to/deep-learning-rocm.rst +++ b/docs/how-to/deep-learning-rocm.rst @@ -84,6 +84,8 @@ The table below summarizes information about ROCm-enabled deep learning framewor - - `Docker image `__ + - `Wheels package `__ + - .. raw:: html From e05cdca54f1858f4c0be69d1fdb176fac06ecf31 Mon Sep 17 00:00:00 2001 From: peterjunpark Date: Tue, 11 Nov 2025 09:00:07 -0500 Subject: [PATCH 10/25] Fix references to vLLM docs (#5651) --- .../rocm-for-ai/inference-optimization/vllm-optimization.rst | 4 +++- docs/how-to/rocm-for-ai/inference/deploy-your-model.rst | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst index 7a6219e0a..93ef8c5df 100644 --- a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst +++ b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst @@ -695,7 +695,9 @@ There are two strategies: vLLM engine arguments ===================== -Selected arguments that often help on ROCm. See `engine args docs `_ for the full list. +Selected arguments that often help on ROCm. See `Engine Arguments +`__ in the vLLM +documentation for the full list. Configure --max-num-seqs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst b/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst index 65d6ac909..d54c686c1 100644 --- a/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst +++ b/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst @@ -22,7 +22,7 @@ See the `GitHub repository `_ and `officia `_ for more information. For guidance on using vLLM with ROCm, refer to `Installation with ROCm -`_. +`__. vLLM installation ----------------- From eb956cfc5c12b79e5582a9160e4f93e162d82439 Mon Sep 17 00:00:00 2001 From: peterjunpark Date: Tue, 11 Nov 2025 09:22:11 -0500 Subject: [PATCH 11/25] Fixed wording related to VLLM_V1_USE_PREFILL_DECODE_ATTENTION (#5605) Co-authored-by: Hongxia Yang --- .../vllm-optimization.rst | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst index 93ef8c5df..2e9dc7a20 100644 --- a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst +++ b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst @@ -67,7 +67,7 @@ Quick start examples: export VLLM_ROCM_USE_AITER=1 vllm serve MODEL_NAME - # Enable only AITER Triton Prefill-Decode (split) attention + # Enable AITER Fused MoE and enable Triton Prefill-Decode (split) attention export VLLM_ROCM_USE_AITER=1 export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 export VLLM_ROCM_USE_AITER_MHA=0 @@ -244,14 +244,17 @@ Most users won't need this, but you can override the defaults: * - AITER MHA (standard models) - ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models) - * - AITER Triton Prefill-Decode (split) + * - vLLM Triton Unified (default) + - ``VLLM_ROCM_USE_AITER=0`` (or unset) + + * - Triton Prefill-Decode (split) without AITER + - | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` + + * - Triton Prefill-Decode (split) along with AITER Fused-MoE - | ``VLLM_ROCM_USE_AITER=1`` | ``VLLM_ROCM_USE_AITER_MHA=0`` | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` - * - vLLM Triton Unified (default) - - ``VLLM_ROCM_USE_AITER=0`` (or unset) - * - AITER Unified Attention - | ``VLLM_ROCM_USE_AITER=1`` | ``VLLM_ROCM_USE_AITER_MHA=0`` @@ -269,11 +272,11 @@ Most users won't need this, but you can override the defaults: --block-size 1 \ --tensor-parallel-size 8 - # Advanced: Use Prefill-Decode split (for short input cases) + # Advanced: Use Prefill-Decode split (for short input cases) with AITER Fused-MoE VLLM_ROCM_USE_AITER=1 \ VLLM_ROCM_USE_AITER_MHA=0 \ VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \ - vllm serve meta-llama/Llama-3.3-70B-Instruct + vllm serve meta-llama/Llama-4-Scout-17B-16E **Which backend should I choose?** @@ -352,14 +355,14 @@ vLLM V1 on ROCm provides these attention implementations: 3. **AITER Triton Prefill–Decode Attention** (hybrid, Instinct MI300X-optimized) - * Enable with ``VLLM_ROCM_USE_AITER=1``, ``VLLM_ROCM_USE_AITER_MHA=0``, and ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` + * Enable with ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` * Uses separate kernels for prefill and decode phases: * **Prefill**: ``context_attention_fwd`` Triton kernel * **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 1–16, context ≤131k; sliding window not supported) * **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements - * Usually better compared to unified Triton kernels (both vLLM and AITER variants) + * Usually better compared to unified Triton kernels * Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios * The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**) From 09c6a9fdefc34acf778b0ab07424f0aae347e45d Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Tue, 11 Nov 2025 16:54:36 -0500 Subject: [PATCH 12/25] 710 RCCL Known Issues and CRIU note update (#5647) * RCCL ALltoALL known issue added * CRIU note added * Minor change * Review feedback and AMDSMI detailed changelog link added * Github issue link added --- CHANGELOG.md | 4 ++++ RELEASE.md | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b49379b1..dc1f665a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,10 @@ for a complete overview of this release. * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios. +```{note} +See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions. +``` + ### **Composable Kernel** (1.1.0) #### Added diff --git a/RELEASE.md b/RELEASE.md index a7ef63f2e..bfe73e6e1 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -735,6 +735,10 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios. +```{note} +See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions. +``` + ### **Composable Kernel** (1.1.0) #### Added @@ -1513,6 +1517,10 @@ As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, dependin Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640). +### RCCL profiler plugin failure with AllToAll operations + +The RCCL profiler plugin `librccl-profiler.so` might fail with a segmentation fault during `AllToAll` collective operations due to improperly assigned point-to-point task function pointers. This leads to invalid memory access and prevents profiling of `AllToAll` performance. Other operations, like `AllReduce`, are unaffected. It's recommended to avoid using the RCCL profiler plugin with `AllToAll` operations until the fix is available. This issue is resolved in the {fab}`github`[RCCL `develop` branch](https://github.com/ROCm/rccl/tree/develop) and will be part of a future ROCm release. See [GitHub issue #5653](https://github.com/ROCm/ROCm/issues/5653). + ## ROCm resolved issues The following are previously known issues resolved in this release. For resolved issues related to From 80ced9552625fa5c38efc2e1ea8d6ca92cb93484 Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Thu, 13 Nov 2025 10:18:15 -0500 Subject: [PATCH 13/25] Changelog updated (#5660) --- CHANGELOG.md | 9 ++------- RELEASE.md | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc1f665a9..827cdf9a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -705,17 +705,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE ### **rocSOLVER** (3.31.0) -#### Added - -* Hybrid computation support for existing STEQR routines. - #### Optimized Improved the performance of: -* BDSQR and downstream functions such as GESVD. -* STEQR and downstream functions such as SYEV/HEEV. -* LARFT and downstream functions such as GEQR2 and GEQRF. +* LARF, LARFT, GEQR2, and downstream functions such as GEQRF. +* STEDC and divide and conquer Eigensolvers. ### **rocSPARSE** (4.1.0) diff --git a/RELEASE.md b/RELEASE.md index bfe73e6e1..0a2ea0b78 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1392,17 +1392,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE ### **rocSOLVER** (3.31.0) -#### Added - -* Hybrid computation support for existing STEQR routines. - #### Optimized Improved the performance of: -* BDSQR and downstream functions such as GESVD. -* STEQR and downstream functions such as SYEV/HEEV. -* LARFT and downstream functions such as GEQR2 and GEQRF. +* LARF, LARFT, GEQR2, and downstream functions such as GEQRF. +* STEDC and divide and conquer Eigensolvers. ### **rocSPARSE** (4.1.0) From d1ce815d8dd26af36b14387d786e6ed4c0988903 Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Thu, 13 Nov 2025 16:08:02 -0500 Subject: [PATCH 14/25] [Ex CI] Add rocprofiler-sdk dep to build for rocprofiler-compute (#5664) --- .azuredevops/components/rocprofiler-compute.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.azuredevops/components/rocprofiler-compute.yml b/.azuredevops/components/rocprofiler-compute.yml index bccb51f67..1bd8a206c 100644 --- a/.azuredevops/components/rocprofiler-compute.yml +++ b/.azuredevops/components/rocprofiler-compute.yml @@ -65,6 +65,10 @@ parameters: - pytest - pytest-cov - pytest-xdist +- name: rocmDependencies + type: object + default: + - rocprofiler-sdk - name: rocmTestDependencies type: object default: @@ -101,7 +105,7 @@ jobs: ${{ if parameters.buildDependsOn }}: dependsOn: - ${{ each build in parameters.buildDependsOn }}: - - ${{ build }}_${{ job.os }}_${{ job.target }} + - ${{ build }}_${{ job.target }} variables: - group: common - template: /.azuredevops/variables-global.yml @@ -119,6 +123,14 @@ jobs: parameters: checkoutRepo: ${{ parameters.checkoutRepo }} sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + checkoutRef: ${{ parameters.checkoutRef }} + dependencyList: ${{ parameters.rocmDependencies }} + gpuTarget: ${{ job.target }} + aggregatePipeline: ${{ parameters.aggregatePipeline }} + ${{ if parameters.triggerDownstreamJobs }}: + downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml parameters: extraBuildFlags: >- From 636d4cc736414afc5211ec8a75fbe18250d380cc Mon Sep 17 00:00:00 2001 From: Carrie Fallows Date: Thu, 13 Nov 2025 20:56:45 -0500 Subject: [PATCH 15/25] Adding dependencies to rocmDependencies in rocprof-compute yaml. Now needed for building because of rocprofiler-sdk dependency. Signed-off-by: Carrie Fallows --- .azuredevops/components/rocprofiler-compute.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.azuredevops/components/rocprofiler-compute.yml b/.azuredevops/components/rocprofiler-compute.yml index 1bd8a206c..2bd4f0978 100644 --- a/.azuredevops/components/rocprofiler-compute.yml +++ b/.azuredevops/components/rocprofiler-compute.yml @@ -68,6 +68,9 @@ parameters: - name: rocmDependencies type: object default: + - clr + - llvm-project + - ROCR-Runtime - rocprofiler-sdk - name: rocmTestDependencies type: object From 99c1590057e64de644b8516d8d12b09cb984c7b1 Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Fri, 14 Nov 2025 12:19:06 -0500 Subject: [PATCH 16/25] [Ex CI] Added ROCM_PATH env var to rocprofiler-compute (#5666) --- .azuredevops/components/rocprofiler-compute.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.azuredevops/components/rocprofiler-compute.yml b/.azuredevops/components/rocprofiler-compute.yml index 1bd8a206c..e7acb7147 100644 --- a/.azuredevops/components/rocprofiler-compute.yml +++ b/.azuredevops/components/rocprofiler-compute.yml @@ -109,6 +109,8 @@ jobs: variables: - group: common - template: /.azuredevops/variables-global.yml + - name: ROCM_PATH + value: $(Agent.BuildDirectory)/rocm pool: vmImage: ${{ variables.BASE_BUILD_POOL }} workspace: From 66b8b96c72c60589424e98d8a1adbaa57845ca3c Mon Sep 17 00:00:00 2001 From: Adel Johar Date: Thu, 6 Nov 2025 13:25:13 +0100 Subject: [PATCH 17/25] [Ex CI] Add missing dependencies for rccl and mivisionx --- .azuredevops/components/rocm-examples.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.azuredevops/components/rocm-examples.yml b/.azuredevops/components/rocm-examples.yml index 9ed999bf5..3f895bda6 100644 --- a/.azuredevops/components/rocm-examples.yml +++ b/.azuredevops/components/rocm-examples.yml @@ -62,6 +62,7 @@ parameters: - llvm-project - MIOpen - MIVisionX + - rocm_smi_lib - rccl - rocALUTION - rocBLAS @@ -100,6 +101,7 @@ parameters: - llvm-project - MIOpen - MIVisionX + - rocm_smi_lib - rccl - rocALUTION - rocBLAS @@ -146,6 +148,7 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} registerROCmPackages: true - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml parameters: @@ -245,5 +248,6 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml parameters: aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} environment: test gpuTarget: ${{ job.target }} From 8d51d0e80304115d64a786db6dcc4107b0c6996a Mon Sep 17 00:00:00 2001 From: Adel Johar Date: Thu, 6 Nov 2025 13:44:44 +0100 Subject: [PATCH 18/25] [Ex CI] Add CXX override for MIGraphX --- .azuredevops/components/AMDMIGraphX.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.azuredevops/components/AMDMIGraphX.yml b/.azuredevops/components/AMDMIGraphX.yml index 4aaada249..1c5df8226 100644 --- a/.azuredevops/components/AMDMIGraphX.yml +++ b/.azuredevops/components/AMDMIGraphX.yml @@ -152,6 +152,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }} + -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include @@ -217,6 +218,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=${{ job.target }} -DAMDGPU_TARGETS=${{ job.target }} + -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include From 99f0875e70468586013c1280afd50f397fa3bd54 Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Thu, 20 Nov 2025 13:52:01 -0500 Subject: [PATCH 19/25] [Ex CI] amdsmi monorepo enablement (#5677) * [Ex CI] amdsmi monorepo enablement * Fix amdsmi yaml --- .azuredevops/components/amdsmi.yml | 104 ++++++++++++++++++----------- 1 file changed, 64 insertions(+), 40 deletions(-) diff --git a/.azuredevops/components/amdsmi.yml b/.azuredevops/components/amdsmi.yml index a5a4e3cad..b00d32460 100644 --- a/.azuredevops/components/amdsmi.yml +++ b/.azuredevops/components/amdsmi.yml @@ -1,10 +1,29 @@ parameters: +- name: componentName + type: string + default: amdsmi - name: checkoutRepo type: string default: 'self' - name: checkoutRef type: string default: '' +# monorepo related parameters +- name: sparseCheckoutDir + type: string + default: '' +- name: triggerDownstreamJobs + type: boolean + default: false +- name: downstreamAggregateNames + type: string + default: '' +- name: buildDependsOn + type: object + default: null +- name: unifiedBuild + type: boolean + default: false # set to true if doing full build of ROCm stack # and dependencies are pulled from same pipeline - name: aggregatePipeline @@ -31,7 +50,7 @@ parameters: jobs: - ${{ each job in parameters.jobMatrix.buildJobs }}: - - job: amdsmi_build_${{ job.os }} + - job: ${{ parameters.componentName }}_build_${{ job.os }} pool: ${{ if eq(job.os, 'ubuntu2404') }}: vmImage: 'ubuntu-24.04' @@ -55,6 +74,7 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: checkoutRepo: ${{ parameters.checkoutRepo }} + sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml parameters: os: ${{ job.os }} @@ -65,50 +85,54 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml parameters: os: ${{ job.os }} + componentName: ${{ parameters.componentName }} + sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml parameters: os: ${{ job.os }} + componentName: ${{ parameters.componentName }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml # parameters: # aptPackages: ${{ parameters.aptPackages }} -- ${{ each job in parameters.jobMatrix.testJobs }}: - - job: amdsmi_test_${{ job.os }}_${{ job.target }} - dependsOn: amdsmi_build_${{ job.os }} - condition: - and(succeeded(), - eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), - not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])), - eq(${{ parameters.aggregatePipeline }}, False) - ) - variables: - - group: common - - template: /.azuredevops/variables-global.yml - pool: ${{ job.target }}_test_pool - workspace: - clean: all - steps: - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml - parameters: - aptPackages: ${{ parameters.aptPackages }} - packageManager: ${{ job.packageManager }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml - parameters: - os: ${{ job.os }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - parameters: - runRocminfo: false - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml - parameters: - componentName: amdsmi - testDir: '$(Agent.BuildDirectory)' - testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst' - testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' - os: ${{ job.os }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml - parameters: - aptPackages: ${{ parameters.aptPackages }} - environment: test - gpuTarget: ${{ job.target }} +- ${{ if eq(parameters.unifiedBuild, False) }}: + - ${{ each job in parameters.jobMatrix.testJobs }}: + - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }} + dependsOn: ${{ parameters.componentName }}_build_${{ job.os }} + condition: + and(succeeded(), + eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'), + not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')), + eq(${{ parameters.aggregatePipeline }}, False) + ) + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: ${{ job.target }}_test_pool + workspace: + clean: all + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + packageManager: ${{ job.packageManager }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + parameters: + os: ${{ job.os }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + parameters: + runRocminfo: false + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: ${{ parameters.componentName }} + testDir: '$(Agent.BuildDirectory)' + testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst' + testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' + os: ${{ job.os }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + environment: test + gpuTarget: ${{ job.target }} From 411334716cfc4b2f640eeedbc5626e65f9b48815 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:54:33 -0500 Subject: [PATCH 20/25] Bump rocm-docs-core from 1.28.0 to 1.29.0 in /docs/sphinx (#5659) Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.28.0 to 1.29.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.28.0...v1.29.0) --- updated-dependencies: - dependency-name: rocm-docs-core dependency-version: 1.29.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/sphinx/requirements.in | 2 +- docs/sphinx/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index e395875a0..e7beea08a 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1,4 +1,4 @@ -rocm-docs-core==1.28.0 +rocm-docs-core==1.29.0 sphinx-reredirects sphinx-sitemap sphinxcontrib.datatemplates==0.11.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 8d602d549..90bb60995 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -187,7 +187,7 @@ requests==2.32.5 # via # pygithub # sphinx -rocm-docs-core==1.28.0 +rocm-docs-core==1.29.0 # via -r requirements.in rpds-py==0.28.0 # via From 4c04da05c3a689e1f7d173eeb1efb48b46285a74 Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Thu, 20 Nov 2025 18:05:17 -0500 Subject: [PATCH 21/25] [Ex CI] Update pipeline ID for amdmis to monorepo (#5685) --- .azuredevops/templates/steps/dependencies-rocm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azuredevops/templates/steps/dependencies-rocm.yml b/.azuredevops/templates/steps/dependencies-rocm.yml index cc8aa57f2..cb50641da 100644 --- a/.azuredevops/templates/steps/dependencies-rocm.yml +++ b/.azuredevops/templates/steps/dependencies-rocm.yml @@ -35,8 +35,8 @@ parameters: developBranch: develop hasGpuTarget: true amdsmi: - pipelineId: 99 - developBranch: amd-staging + pipelineId: 376 + developBranch: develop hasGpuTarget: false aomp-extras: pipelineId: 111 From 807ec6afcfea412083227dfa1dd7778f9cb5264f Mon Sep 17 00:00:00 2001 From: amd-hsivasun Date: Thu, 20 Nov 2025 18:05:24 -0500 Subject: [PATCH 22/25] [Ex CI] Update AMDMIGraphX CMake version (#5683) --- .azuredevops/components/AMDMIGraphX.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.azuredevops/components/AMDMIGraphX.yml b/.azuredevops/components/AMDMIGraphX.yml index 1c5df8226..7e6460468 100644 --- a/.azuredevops/components/AMDMIGraphX.yml +++ b/.azuredevops/components/AMDMIGraphX.yml @@ -128,6 +128,9 @@ jobs: parameters: aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml + parameters: + cmakeVersion: '3.28.6' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: @@ -193,6 +196,9 @@ jobs: parameters: aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml + parameters: + cmakeVersion: '3.28.6' - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: From 702d8e4c8e1c17d9660033b1c47382540f492aff Mon Sep 17 00:00:00 2001 From: Pratik Basyal Date: Mon, 24 Nov 2025 11:52:38 -0500 Subject: [PATCH 23/25] New link updated for MIgraphx (#5691) --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 0a2ea0b78..5fdec3193 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1481,7 +1481,7 @@ issues related to individual components, review the [Detailed component changes] ### MIGraphX Python API will fail when running on Python 3.13 -Applications using the MIGraphX Python API will fail when running on Python 3.13 and return the error message `AttributeError: module 'migraphx' has no attribute 'parse_onnx'`. The issue doesn't occur when you manually build MIGraphX. For detailed instructions, see [Building from source](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/install/building_migraphx.html). As a workaround, change the Python version to the one found in the installed location: +Applications using the MIGraphX Python API will fail when running on Python 3.13 and return the error message `AttributeError: module 'migraphx' has no attribute 'parse_onnx'`. The issue doesn't occur when you manually build MIGraphX. For detailed instructions, see [Building from source](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/install/install-migraphx.html#build-migraphx-from-source). As a workaround, change the Python version to the one found in the installed location: ``` ls -l /opt/rocm-7.1.0/lib/libmigraphx_py_*.so From 096d91e190c206e1b9af2a2ccb8c1e5df92be428 Mon Sep 17 00:00:00 2001 From: srayasam-amd Date: Wed, 26 Nov 2025 16:08:03 +0530 Subject: [PATCH 24/25] Updating rocm version to 7.1.1 GA (#5697) * 7.1.1 GA update * 7.1.1 GA update * Update rocm-7.1.1.xml * Update default.xml --- default.xml | 4 ++- tools/rocm-build/rocm-7.1.1.xml | 60 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 tools/rocm-build/rocm-7.1.1.xml diff --git a/default.xml b/default.xml index 3f1a47817..dfb3c4d10 100644 --- a/default.xml +++ b/default.xml @@ -1,7 +1,7 @@ - @@ -25,6 +25,7 @@ + @@ -45,6 +46,7 @@ rocprofiler rocr-runtime roctracer --> + diff --git a/tools/rocm-build/rocm-7.1.1.xml b/tools/rocm-build/rocm-7.1.1.xml new file mode 100644 index 000000000..0f0fdbd26 --- /dev/null +++ b/tools/rocm-build/rocm-7.1.1.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 4490c57c6a72802fa4e38c0a27cdd1b95776fc3b Mon Sep 17 00:00:00 2001 From: Alex Xu Date: Wed, 26 Nov 2025 10:33:02 -0500 Subject: [PATCH 25/25] resolve merge conflict --- docs/compatibility/compatibility-matrix.rst | 30 --------------------- 1 file changed, 30 deletions(-) diff --git a/docs/compatibility/compatibility-matrix.rst b/docs/compatibility/compatibility-matrix.rst index 1b937abec..d156df1e9 100644 --- a/docs/compatibility/compatibility-matrix.rst +++ b/docs/compatibility/compatibility-matrix.rst @@ -155,38 +155,8 @@ compatibility and system requirements. .. rubric:: Footnotes -<<<<<<< HEAD .. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 `_, `ROCm 7.1.0 `_, and `ROCm 6.4.0 `_. .. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 `_, `ROCm 7.1.0 `_, and `ROCm 6.4.0 `_. -======= -.. [#rhel-10-702] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU. -.. [#rhel-94-702] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`. -.. [#rhel-700] RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs. -.. [#sles-710] **For ROCm 7.1.x** - SLES 15 SP7 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs. -.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs. -.. [#ol-710-mi300x] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU. -.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU. -.. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs. -.. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs. -.. [#db12-710] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs. -.. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs. -.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs. -.. [#rl-700] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs. -.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality. -.. [#mi350x-os-710] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs supports all listed :ref:`supported_distributions` except RHEL 8.10, SLES 15 SP7, Debian 12, Rocky 9, Azure Linux 3.0, and Oracle Linux 8. -.. [#mi350x-os-700] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9. -.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, and RHEL 9.6. -.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0. -.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) GPUs only supports Ubuntu 24.04.3 and Ubuntu 22.04.5. -.. [#mi325x-os-710] **For ROCm 7.1.x** - AMD Instinct MI325X GPUs (gfx942) supports all listed :ref:`supported_distributions` except RHEL 8.10, Rocky 9, Azure Linux 3.0, and Oracle Linux 8. -.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPUs (gfx942) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4. -.. [#mi300x-os] **Starting ROCm 7.0.x** - AMD Instinct MI300X GPUs (gfx942) supports all listed :ref:`supported_distributions`. -.. [#mi300A-os] **Starting ROCm 7.0.x** - AMD Instinct MI300A GPUs (gfx942) supports all listed :ref:`supported_distributions` except on Debian 13, Azure Linux 3.0, Oracle Linux 10, Oracle Linux 9, and Oracle Linux 8. -.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12. -.. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7. -.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10. -.. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead. ->>>>>>> external/develop .. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0. .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x. .. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.