Apply suggestion from @davidd-amd

Update yamlcpp.yml
2026-01-09 22:58:17 -05:00 · 2025-10-15 22:47:02 -06:00 · 2025-10-15 22:46:54 -06:00 · 2025-10-15 22:40:55 -06:00 · 2025-10-15 22:31:10 -06:00 · 2025-10-15 22:29:03 -06:00
75 changed files with 5141 additions and 3594 deletions
--- a/.azuredevops/components/HIPIFY.yml
+++ b/.azuredevops/components/HIPIFY.yml
@@ -79,7 +79,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - task: Bash@3
      displayName: Add lit to PATH
      inputs:
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -131,7 +131,7 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -212,7 +212,7 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/ROCR-Runtime.yml
+++ b/.azuredevops/components/ROCR-Runtime.yml
@@ -37,6 +37,7 @@ parameters:
    - libdrm-dev
    - libelf-dev
    - libnuma-dev
+    - libsimde-dev
    - ninja-build
    - pkg-config
 - name: rocmDependencies
--- a/.azuredevops/components/aqlprofile.yml
+++ b/.azuredevops/components/aqlprofile.yml
@@ -0,0 +1,174 @@
+parameters:
+- name: componentName
+  type: string
+  default: aqlprofile
+- name: checkoutRepo
+  type: string
+  default: 'self'
+- name: checkoutRef
+  type: string
+  default: ''
+# monorepo related parameters
+- name: sparseCheckoutDir
+  type: string
+  default: ''
+- name: triggerDownstreamJobs
+  type: boolean
+  default: false
+- name: downstreamAggregateNames
+  type: string
+  default: ''
+- name: buildDependsOn
+  type: object
+  default: null
+- name: unifiedBuild
+  type: boolean
+  default: false
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
+- name: aptPackages
+  type: object
+  default:
+    - cmake
+    - git
+    - ninja-build
+    - python3-pip
+- name: rocmDependencies
+  type: object
+  default:
+    - clr
+    - llvm-project
+    - ROCR-Runtime
+- name: rocmTestDependencies
+  type: object
+  default:
+    - clr
+    - llvm-project
+    - ROCR-Runtime
+    - rocprofiler-register
+
+- name: jobMatrix
+  type: object
+  default:
+    buildJobs:
+      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+    testJobs:
+      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+
+jobs:
+- ${{ each job in parameters.jobMatrix.buildJobs }}:
+  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+    ${{ if parameters.buildDependsOn }}:
+      dependsOn:
+        - ${{ each build in parameters.buildDependsOn }}:
+          - ${{ build }}_${{ job.os }}
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+      parameters:
+        checkoutRepo: ${{ parameters.checkoutRepo }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
+      parameters:
+        dependencyList:
+          - gtest
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmDependencies }}
+        gpuTarget: ${{ job.target }}
+        os: ${{ job.os }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
+        ${{ if parameters.triggerDownstreamJobs }}:
+          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+      parameters:
+        os: ${{ job.os }}
+        consolidateBuildAndInstall: true
+        extraBuildFlags: >-
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
+          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/aqlprofile/cmake_modules
+          -DAQLPROFILE_BUILD_TESTS=ON
+          -DGPU_TARGETS=${{ job.target }}
+          -GNinja
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+      parameters:
+        componentName: ${{ parameters.componentName }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+        gpuTarget: ${{ job.target }}
+        os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+      parameters:
+        componentName: ${{ parameters.componentName }}
+        gpuTarget: ${{ job.target }}
+        os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+    - ${{ if eq(job.os, 'ubuntu2204') }}:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          gpuTarget: ${{ job.target }}
+
+- ${{ if eq(parameters.unifiedBuild, False) }}:
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
+    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+      condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+      variables:
+      - group: common
+      - template: /.azuredevops/variables-global.yml
+      pool: ${{ job.target }}_test_pool
+      workspace:
+        clean: all
+      steps:
+      - checkout: none
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          packageManager: ${{ job.packageManager }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        parameters:
+          preTargetFilter: ${{ parameters.componentName }}
+          gpuTarget: ${{ job.target }}
+          os: ${{ job.os }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        parameters:
+          checkoutRef: ${{ parameters.checkoutRef }}
+          dependencyList: ${{ parameters.rocmTestDependencies }}
+          gpuTarget: ${{ job.target }}
+          os: ${{ job.os }}
+          ${{ if parameters.triggerDownstreamJobs }}:
+            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        parameters:
+          componentName: ${{ parameters.componentName }}
+          testDir: $(Agent.BuildDirectory)/rocm/share/hsa-amd-aqlprofile/
+          testExecutable: ./run_tests.sh
+          testParameters: ''
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          environment: test
+          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipBLASLt.yml
+++ b/.azuredevops/components/hipBLASLt.yml
@@ -77,6 +77,7 @@ parameters:
    - clr
    - hipBLAS-common
    - llvm-project
+    - rocm-cmake
    - rocminfo
    - rocm_smi_lib
    - rocprofiler-register
@@ -144,7 +145,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/hipSPARSELt.yml
+++ b/.azuredevops/components/hipSPARSELt.yml
@@ -54,6 +54,7 @@ parameters:
    - hipSPARSE
    - llvm-project
    - rocBLAS
+    - rocm-cmake
    - rocm_smi_lib
    - rocminfo
    - rocprofiler-register
@@ -67,6 +68,7 @@ parameters:
    - llvm-project
    - hipBLAS-common
    - hipBLASLt
+    - rocm-cmake
    - rocBLAS
    - rocminfo
    - rocprofiler-register
@@ -110,7 +112,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -77,6 +77,7 @@ jobs:
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_BUILD_TYPE=Release
          -DHIPTENSOR_BUILD_TESTS=ON
--- a/.azuredevops/components/hipfort.yml
+++ b/.azuredevops/components/hipfort.yml
@@ -71,7 +71,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/origami.yml
+++ b/.azuredevops/components/origami.yml
@@ -39,6 +39,9 @@ parameters:
    - python3
    - python3-dev
    - python3-pip
+    - libgtest-dev
+    - libboost-filesystem-dev
+    - libboost-program-options-dev
 - name: pipModules
  type: object
  default:
@@ -107,8 +110,12 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
+      parameters:
+        dependencyList:
+          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
@@ -125,7 +132,7 @@ jobs:
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DORIGAMI_BUILD_SHARED_LIBS=ON
          -DORIGAMI_ENABLE_PYTHON=ON
@@ -206,7 +213,15 @@ jobs:
          ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        parameters:
+          componentName: ${{ parameters.componentName }}
+          os: ${{ job.os }}
+          testDir: '$(Agent.BuildDirectory)/rocm/bin'
+          testExecutable: './origami-tests'
+          testParameters: '--yaml origami-tests.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
      - script: |
+          set -e
          export PYTHONPATH=$(Agent.BuildDirectory)/s/build/python:$PYTHONPATH

          echo "--- Running origami_test.py ---"
--- a/.azuredevops/components/rccl.yml
+++ b/.azuredevops/components/rccl.yml
@@ -70,7 +70,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: rccl_build_${{ job.target }}
-    timeoutInMinutes: 90
+    timeoutInMinutes: 120
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -83,7 +83,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/rdc.yml
+++ b/.azuredevops/components/rdc.yml
@@ -1,10 +1,29 @@
 parameters:
+- name: componentName
+  type: string
+  default: rdc
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
+# monorepo related parameters
+- name: sparseCheckoutDir
+  type: string
+  default: ''
+- name: triggerDownstreamJobs
+  type: boolean
+  default: false
+- name: downstreamAggregateNames
+  type: string
+  default: ''
+- name: buildDependsOn
+  type: object
+  default: null
+- name: unifiedBuild
+  type: boolean
+  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,6 +52,7 @@ parameters:
    - clr
    - hipBLAS-common
    - hipBLASLt
+    - hipRAND
    - llvm-project
    - rocBLAS
    - rocm-cmake
@@ -43,6 +63,7 @@ parameters:
    - rocprofiler
    - rocprofiler-register
    - rocprofiler-sdk
+    - rocRAND
    - ROCR-Runtime
 - name: rocmTestDependencies
  type: object
@@ -74,7 +95,11 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rdc_build_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
+    ${{ if parameters.buildDependsOn }}:
+      dependsOn:
+        - ${{ each build in parameters.buildDependsOn }}:
+          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -85,16 +110,22 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+      parameters:
+        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
+        ${{ if parameters.triggerDownstreamJobs }}:
+          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
  # Build grpc
    - task: Bash@3
      displayName: 'git clone grpc'
@@ -104,6 +135,7 @@ jobs:
        workingDirectory: $(Build.SourcesDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
        cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
        cmakeSourceDir: $(Build.SourcesDirectory)/grpc
        installDir: $(Build.SourcesDirectory)/bin
@@ -117,6 +149,7 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DGRPC_ROOT="$(Build.SourcesDirectory)/bin"
@@ -126,9 +159,12 @@ jobs:
          -DAMDGPU_TARGETS=${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -136,60 +172,64 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}

- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: rdc_test_${{ job.target }}
-    dependsOn: rdc_build_${{ job.target }}
-    condition:
-      and(succeeded(),
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-        eq(${{ parameters.aggregatePipeline }}, False)
-      )
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: ROCM_PATH
-      value: $(Agent.BuildDirectory)/rocm
-    - name: ROCM_DIR
-      value: $(Agent.BuildDirectory)/rocm
-    pool: ${{ job.target }}_test_pool
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmTestDependencies }}
-        gpuTarget: ${{ job.target }}
-    - task: Bash@3
-      displayName: Setup test environment
-      inputs:
-        targetType: inline
-        script: |
-          sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
-          echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
-          sudo ldconfig -v
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-    - task: Bash@3
-      displayName: Test rdc
-      inputs:
-        targetType: inline
-        script: >-
-          $(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
-          --batch_mode
-          --start_rdcd
-          --unauth_comm
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        environment: test
-        gpuTarget: ${{ job.target }}
-        extraPaths: /home/user/workspace/rocm/bin
+- ${{ if eq(parameters.unifiedBuild, False) }}:
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
+    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+      condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+      variables:
+      - group: common
+      - template: /.azuredevops/variables-global.yml
+      - name: ROCM_PATH
+        value: $(Agent.BuildDirectory)/rocm
+      - name: ROCM_DIR
+        value: $(Agent.BuildDirectory)/rocm
+      pool: ${{ job.target }}_test_pool
+      workspace:
+        clean: all
+      steps:
+      - checkout: none
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        parameters:
+          gpuTarget: ${{ job.target }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        parameters:
+          checkoutRef: ${{ parameters.checkoutRef }}
+          dependencyList: ${{ parameters.rocmTestDependencies }}
+          gpuTarget: ${{ job.target }}
+          ${{ if parameters.triggerDownstreamJobs }}:
+              downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+      - task: Bash@3
+        displayName: Setup test environment
+        inputs:
+          targetType: inline
+          script: |
+            sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
+            echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
+            sudo ldconfig -v
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      - task: Bash@3
+        displayName: Test rdc
+        inputs:
+          targetType: inline
+          script: >-
+            $(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
+            --batch_mode
+            --start_rdcd
+            --unauth_comm
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          environment: test
+          gpuTarget: ${{ job.target }}
+          extraPaths: /home/user/workspace/rocm/bin
--- a/.azuredevops/components/rocBLAS.yml
+++ b/.azuredevops/components/rocBLAS.yml
@@ -70,6 +70,7 @@ parameters:
    - hipBLAS-common
    - hipBLASLt
    - llvm-project
+    - rocm-cmake
    - rocminfo
    - rocprofiler-register
    - rocm_smi_lib
@@ -154,7 +155,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -210,7 +210,7 @@ jobs:
        parameters:
          componentName: ${{ parameters.componentName }}
          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
-          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
+          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
          os: ${{ job.os }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -14,9 +14,13 @@ parameters:
  type: object
  default:
    - cmake
+    - libdw-dev
    - libglfw3-dev
    - libmsgpack-dev
+    - libopencv-dev
    - libtbb-dev
+    - libtiff-dev
+    - libva-amdgpu-dev
    - ninja-build
    - python3-pip
 - name: rocmDependencies
@@ -33,16 +37,22 @@ parameters:
    - hipRAND
    - hipSOLVER
    - hipSPARSE
+    - hipTensor
    - llvm-project
+    - MIOpen
    - rocBLAS
    - rocFFT
+    - rocJPEG
    - rocPRIM
    - rocprofiler-register
+    - rocprofiler-sdk
    - ROCR-Runtime
    - rocRAND
    - rocSOLVER
    - rocSPARSE
    - rocThrust
+    - rocWMMA
+    - rpp
 - name: rocmTestDependencies
  type: object
  default:
@@ -57,18 +67,24 @@ parameters:
    - hipRAND
    - hipSOLVER
    - hipSPARSE
+    - hipTensor
    - llvm-project
+    - MIOpen
    - rocBLAS
    - rocFFT
    - rocminfo
    - rocPRIM
+    - rocJPEG
    - rocprofiler-register
+    - rocprofiler-sdk
    - ROCR-Runtime
    - rocRAND
    - rocSOLVER
    - rocSPARSE
    - rocThrust
    - roctracer
+    - rocWMMA
+    - rpp

 - name: jobMatrix
  type: object
@@ -97,6 +113,10 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
+        registerROCmPackages: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+      parameters:
+        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -158,6 +178,10 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
+        registerROCmPackages: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+      parameters:
+        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/rocm-libraries.yml
+++ b/.azuredevops/components/rocm-libraries.yml
@@ -43,9 +43,14 @@ parameters:
    - ninja-build
    - python3-pip
    - python3-venv
+    - googletest
+    - libgtest-dev
+    - libgmock-dev
+    - libboost-filesystem-dev
 - name: pipModules
  type: object
  default:
+    - msgpack
    - joblib
    - "packaging>=22.0"
    - pytest
@@ -102,7 +107,7 @@ jobs:
    workspace:
      clean: all
    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
@@ -147,6 +152,13 @@ jobs:
          echo "##vso[task.prependpath]$USER_BASE/bin"
          echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
        displayName: Set cmake configure paths
+    - task: Bash@3
+      displayName: Add ROCm binaries to PATH
+      inputs:
+        targetType: inline
+        script: |
+          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
+          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
--- a/.azuredevops/components/rocprofiler-sdk.yml
+++ b/.azuredevops/components/rocprofiler-sdk.yml
@@ -213,6 +213,7 @@ jobs:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
        parameters:
          componentName: ${{ parameters.componentName }}
+          testDir: $(Agent.BuildDirectory)/s/build
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocprofiler-systems.yml
+++ b/.azuredevops/components/rocprofiler-systems.yml
@@ -226,8 +226,11 @@ jobs:
            echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
        parameters:
+          cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/rocprofiler-systems
    # build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
          extraBuildFlags: >-
+            -DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocprofiler-systems
+            -DROCPROFSYS_USE_PYTHON=ON
            -DROCPROFSYS_BUILD_TESTING=ON
            -DROCPROFSYS_BUILD_DYNINST=ON
            -DROCPROFSYS_BUILD_LIBUNWIND=ON
@@ -245,11 +248,13 @@ jobs:
        displayName: Set up rocprofiler-systems env
        inputs:
          targetType: inline
-          script: source share/rocprofiler-systems/setup-env.sh
-          workingDirectory: build
+          script: source $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems/setup-env.sh
+          workingDirectory: $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
        parameters:
          componentName: ${{ parameters.componentName }}
+          testDir: $(Agent.BuildDirectory)/s/build/tests/
+          testParameters: '--output-on-failure'
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
        parameters:
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/dependencies/cli11.yml
+++ b/.azuredevops/dependencies/cli11.yml
@@ -0,0 +1,63 @@
+parameters:
+- name: checkoutRepo
+  type: string
+  default: 'self'
+- name: checkoutRef
+  type: string
+  default: ''
+- name: cli11Version
+  type: string
+  default: ''
+- name: aptPackages
+  type: object
+  default:
+    - cmake
+    - git
+    - ninja-build
+
+- name: jobMatrix
+  type: object
+  default:
+    buildJobs:
+      - { os: ubuntu2204, packageManager: apt}
+      - { os: almalinux8, packageManager: dnf}
+
+jobs:
+- ${{ each job in parameters.jobMatrix.buildJobs }}:
+  - job: cli11_${{ job.os }}
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool:
+      vmImage: 'ubuntu-22.04'
+    ${{ if eq(job.os, 'almalinux8') }}:
+      container:
+        image: rocmexternalcicd.azurecr.io/manylinux228:latest
+        endpoint: ContainerService3
+    workspace:
+      clean: all
+    steps:
+    - checkout: none
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - task: Bash@3
+      displayName: Clone cli11 ${{ parameters.cli11Version }}
+      inputs:
+        targetType: inline
+        script: git clone https://github.com/CLIUtils/CLI11.git -b ${{ parameters.cli11Version }}
+        workingDirectory: $(Agent.BuildDirectory)
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+      parameters:
+        os: ${{ job.os }}
+        cmakeBuildDir: $(Agent.BuildDirectory)/CLI11/build
+        cmakeSourceDir: $(Agent.BuildDirectory)/CLI11
+        useAmdclang: false
+        extraBuildFlags: >-
+          -DCMAKE_BUILD_TYPE=Release
+          -GNinja
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+      parameters:
+        os: ${{ job.os }}
--- a/.azuredevops/dependencies/yamlcpp.yml
+++ b/.azuredevops/dependencies/yamlcpp.yml
@@ -0,0 +1,66 @@
+parameters:
+- name: checkoutRepo
+  type: string
+  default: 'self'
+- name: checkoutRef
+  type: string
+  default: ''
+- name: yamlcppVersion
+  type: string
+  default: ''
+- name: aptPackages
+  type: object
+  default:
+    - cmake
+    - git
+    - ninja-build
+
+- name: jobMatrix
+  type: object
+  default:
+    buildJobs:
+      - { os: ubuntu2204, packageManager: apt}
+      - { os: almalinux8, packageManager: dnf}
+
+jobs:
+- ${{ each job in parameters.jobMatrix.buildJobs }}:
+  - job: yamlcpp_${{ job.os }}
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool:
+      vmImage: 'ubuntu-22.04'
+    ${{ if eq(job.os, 'almalinux8') }}:
+      container:
+        image: rocmexternalcicd.azurecr.io/manylinux228:latest
+        endpoint: ContainerService3
+    workspace:
+      clean: all
+    steps:
+    - checkout: none
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - task: Bash@3
+      displayName: Clone yaml-cpp ${{ parameters.yamlcppVersion }}
+      inputs:
+        targetType: inline
+        script: git clone  https://github.com/jbeder/yaml-cpp.git -b ${{ parameters.yamlcppVersion }}
+        workingDirectory: $(Agent.BuildDirectory)
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+      parameters:
+        os: ${{ job.os }}
+        cmakeBuildDir: $(Agent.BuildDirectory)/yaml-cpp/build
+        cmakeSourceDir: $(Agent.BuildDirectory)/yaml-cpp
+        useAmdclang: false
+        extraBuildFlags: >-
+          -DCMAKE_BUILD_TYPE=Release
+          -DYAML_CPP_BUILD_TOOLS=OFF
+          -DYAML_BUILD_SHARED_LIBS=OFF
+          -DYAML_CPP_INSTALL=ON
+          -GNinja
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+      parameters:
+        os: ${{ job.os }}
--- a/.azuredevops/tag-builds/cli11.yml
+++ b/.azuredevops/tag-builds/cli11.yml
@@ -0,0 +1,23 @@
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml
+
+parameters:
+- name: cli11Version
+  type: string
+  default: "main"
+
+resources:
+  repositories:
+  - repository: pipelines_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/ROCm
+
+trigger: none
+pr: none
+
+jobs:
+  - template: ${{ variables.CI_DEPENDENCIES_PATH }}/cli11.yml
+    parameters:
+      cli11Version: ${{ parameters.cli11Version }}
--- a/.azuredevops/tag-builds/yaml-cpp.yml
+++ b/.azuredevops/tag-builds/yaml-cpp.yml
@@ -0,0 +1,24 @@
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml
+
+parameters:
+- name: yamlcppVersion
+  type: string
+  default: "0.8.0"
+
+resources:
+  repositories:
+  - repository: pipelines_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/ROCm
+
+trigger: none
+pr: none
+
+jobs:
+  - template: ${{ variables.CI_DEPENDENCIES_PATH }}/yamlcpp.yml
+    parameters:
+      yamlcppVersion: ${{ parameters.yamlcppVersion }}
+      
--- a/.azuredevops/templates/steps/dependencies-cmake-custom.yml
+++ b/.azuredevops/templates/steps/dependencies-cmake-custom.yml
@@ -1,10 +1,15 @@
+parameters:
+  - name: cmakeVersion
+    type: string
+    default: '3.31.0'
+
 steps:
 - task: Bash@3
-  displayName: Install CMake 3.31
+  displayName: Install CMake ${{ parameters.cmakeVersion }}
  inputs:
    targetType: inline
    script: |
-      CMAKE_VERSION=3.31.0
+      CMAKE_VERSION=${{ parameters.cmakeVersion }}
      CMAKE_ROOT="$(Pipeline.Workspace)/cmake"

      echo "Downloading CMake $CMAKE_VERSION..."
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -46,6 +46,10 @@ parameters:
      pipelineId: 115
      developBranch: aomp-dev
      hasGpuTarget: false
+    aqlprofile:
+      pipelineId: 365
+      developBranch: develop
+      hasGpuTarget: false
    clr:
      pipelineId: 335
      developBranch: develop
@@ -126,13 +130,17 @@ parameters:
      pipelineId: 80
      developBranch: develop
      hasGpuTarget: true
+    origami:
+      pipelineId: 364
+      developBranch: develop
+      hasGpuTarget: true
    rccl:
      pipelineId: 107
      developBranch: develop
      hasGpuTarget: true
    rdc:
-      pipelineId: 100
-      developBranch: amd-staging
+      pipelineId: 360
+      developBranch: develop
      hasGpuTarget: false
    rocAL:
      pipelineId: 151
@@ -219,8 +227,8 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    rocprofiler-systems:
-      pipelineId: 255
-      developBranch: amd-staging
+      pipelineId: 345
+      developBranch: develop
      hasGpuTarget: true
    rocPyDecode:
      pipelineId: 239
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -43,6 +43,7 @@ Blit
 Blockwise
 Bluefield
 Bootloader
+Broadcom
 CAS
 CCD
 CDNA
@@ -146,6 +147,8 @@ Filesystem
 FindDb
 Flang
 FlashAttention
+FlashInfer’s
+FlashInfer
 FluxBenchmark
 Fortran
 Fuyu
@@ -310,6 +313,7 @@ Mooncake
 Mpops
 Multicore
 Multithreaded
+MXFP
 MyEnvironment
 MyST
 NANOO
@@ -480,6 +484,7 @@ TCI
 TCIU
 TCP
 TCR
+TVM
 THREADGROUPS
 threadgroups
 TensorRT
@@ -670,6 +675,7 @@ detections
 dev
 devicelibs
 devsel
+dgl
 dimensionality
 disambiguates
 distro
@@ -709,6 +715,7 @@ githooks
 github
 globals
 gnupg
+gpu
 grayscale
 gx
 gzip
@@ -763,6 +770,7 @@ invariants
 invocating
 ipo
 jax
+json
 kdb
 kfd
 kv
@@ -963,6 +971,7 @@ tabindex
 targetContainer
 td
 tensorfloat
+tf
 th
 tokenization
 tokenize
@@ -975,6 +984,7 @@ toolset
 toolsets
 torchtitan
 torchvision
+tp
 tqdm
 tracebacks
 txt
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,9 +4,123 @@ This page is a historical overview of changes made to ROCm components. This
 consolidated changelog documents key modifications and improvements across
 different versions of the ROCm software stack and its components.

+## ROCm 7.0.2
+
+See the [ROCm 7.0.2 release notes](https://rocm.docs.amd.com/en/docs-7.0.2/about/release-notes.html#rocm-7-0-2-release-notes)
+for a complete overview of this release.
+
+### **AMD SMI** (26.0.2)
+
+#### Added
+
+* Added `bad_page_threshold_exceeded` field to `amd-smi static --ras`, which compares retired pages count against bad page threshold. This field displays `True` if retired pages exceed the threshold, `False` if within threshold, or `N/A` if threshold data is unavailable. Note that `sudo` is required to have the `bad_page_threshold_exceeded` field populated.
+
+#### Removed
+
+* Removed gpuboard and baseboard temperatures enums in amdsmi Python Library.
+    * `AmdSmiTemperatureType` had issues with referencing the correct attribute. As such, the following duplicate enums have been removed:
+        - `AmdSmiTemperatureType.GPUBOARD_NODE_FIRST`
+        - `AmdSmiTemperatureType.GPUBOARD_VR_FIRST`
+        - `AmdSmiTemperatureType.BASEBOARD_FIRST`
+
+#### Resolved Issues
+
+* Fixed `attribute error` in `amd-smi monitor` on Linux Guest systems, where the violations argument caused CLI to break.
+* Fixed certain output in `amd-smi monitor` when GPUs are partitioned.  
+  * It fixes the amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, `amd-smi monitor -Vqt --file /tmp/test1`, etc. These commands will now be able to display as normal in partitioned GPU scenarios.
+
+* Fixed an issue where using `amd-smi ras --folder <folder_name>` was forcing the created folder's name to be lowercase. This fix also allows all string input options to be case insensitive.
+
+* Fixed an issue of some processes not being detected by AMD SMI despite making use of KFD resources. This fix, with the addition of KFD Fallback for process detection, ensures that all KFD processes will be detected.
+
+* Multiple CPER issues were fixed.  
+  - Issue of being unable to query for additional CPERs after 20 were generated on a single device.
+  - Issue where the RAS HBM CRC read was failing due to an incorrect AFID value.
+  - Issue where RAS injections were not consistently producing related CPERs.
+
+### **HIP** (7.0.2)
+
+#### Added
+
+* Support for the `hipMemAllocationTypeUncached` flag, enabling developers to allocate uncached memory. This flag is now supported in the following APIs:
+    - `hipMemGetAllocationGranularity` determines the recommended allocation granularity for uncached memory.
+    - `hipMemCreate` allocates memory with uncached properties.
+
+#### Resolved issues
+
+* A compilation failure affecting applications that compile kernels using `hiprtc` with the compiler option `std=c++11`.
+* A permission-related error occurred during the execution of `hipLaunchHostFunc`. This API is now supported and permitted to run during stream capture, aligning its behavior with CUDA.
+* A numerical error during graph capture of kernels that rely on a remainder in `globalWorkSize`, in frameworks like MIOpen and PyTorch, where the grid size is not a multiple of the block size. To ensure correct replay behavior, HIP runtime now stores this remainder in `hip::GraphKernelNode` during `hipExtModuleLaunchKernel` capture, enabling accurate execution and preventing corruption.
+* A page fault occurred during viewport rendering while running the file undo.blend in Blender. The issue was resolved by the HIP runtime, which reused the same context during image creation.
+* Resolved a segmentation fault in `gpu_metrics`, which is used in threshold logic for command submission patches to GPU device(s) during CPU synchronization.
+
+### **hipBLAS** (3.0.2)
+ 
+#### Added
+ 
+* Enabled support for gfx1150, gfx1151, gfx1200, and gfx1201 AMD hardware.
+
+### **RCCL** (2.26.6)
+
+#### Added
+
+* Enabled double-buffering in `reduceCopyPacks` to trigger pipelining, especially to overlap bf16 arithmetic.
+* Added `--force-reduce-pipeline` as an option that can be passed to the `install.sh` script. Passing this option will enable software-triggered pipelining `bfloat16` reductions (that is, `all_reduce`, `reduce_scatter`, and `reduce`).
+
+### **rocBLAS** (5.0.2)
+ 
+#### Added
+ 
+* Enabled gfx1150 and gfx1151.
+* The `ROCBLAS_USE_HIPBLASLT_BATCHED` variable to independently control the batched hipblaslt backend. Set `ROCBLAS_USE_HIPBLASLT_BATCHED=0` to disable batched GEMM use of the hipblaslt backend.
+
+#### Resolved issues
+ 
+* Set the imaginary portion of the main diagonal of the output matrix to zero in syrk and herk.
+
+### **ROCdbgapi** (0.77.4)
+
+#### Added
+
+* ROCdbgapi documentation link in the README.md file.
+
+### **ROCm Systems Profiler** (1.1.1)
+
+#### Resolved issues
+
+* Fixed an issue where ROC-TX ranges were displayed as two separate events instead of a single spanning event.
+
+### **rocPRIM** (4.0.1)
+
+#### Resolved issues
+
+* Fixed compilation issue when using `rocprim::texture_cache_iterator`.
+* Fixed a HIP version check used to determine whether `hipStreamLegacy` is supported. This resolves runtime errors that occur when `hipStreamLegacy` is used in ROCm 7.0.0 and later.
+
+### **rocSPARSE** (4.0.3)
+
+#### Resolved issues
+
+* Fixed an issue causing premature deallocation of internal buffers while still in use.
+
+### **rocSOLVER** (3.30.1)
+
+#### Optimized
+
+Improved the performance of:
+
+* LARFT and downstream functions such as GEQRF and ORMTR.
+* LARF and downstream functions such as GEQR2.
+* ORMTR and downstream functions such as SYEVD.
+* GEQR2 and downstream functions such as GEQRF.
+
+## ROCm 7.0.1
+
+ROCm 7.0.1 is a quality release that resolves the existing issue. There is no change in component from the previous ROCm 7.0.0 release. See the [ROCm 7.0.1 release notes](https://rocm.docs.amd.com/en/docs-7.0.1/about/release-notes.html#rocm-7-0-1-release-notes) for a complete overview of this release.
+
 ## ROCm 7.0.0

-See the [ROCm 7.0.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html#rocm-7-0-0-release-notes)
+See the [ROCm 7.0.0 release notes](https://rocm.docs.amd.com/en/docs-7.0.0/about/release-notes.html#rocm-7-0-0-release-notes)
 for a complete overview of this release.

 ### **AMD SMI** (26.0.0)
@@ -798,11 +912,15 @@ HIP runtime has the following functional improvements which improves runtime per
 * Compatibility with NCCL 2.25.1.
 * Compatibility with NCCL 2.26.6.

+#### Optimized
+* Improved the performance of the `FP8` Sum operation by upcasting to `FP16`.
+
 #### Resolved issues

 * Resolved an issue when using more than 64 channels when multiple collectives are used in the same `ncclGroup()` call.
 * Fixed unit test failures in tests ending with the `ManagedMem` and `ManagedMemGraph` suffixes.
 * Fixed a suboptimal algorithmic switching point for AllReduce on the AMD Instinct MI300X.
+* Fixed broken functionality within the LL protocol on gfx950 by disabling inlining of LLGenericOp kernels.
 * Fixed the known issue "When splitting a communicator using `ncclCommSplit` in some GPU configurations, MSCCL initialization can cause a segmentation fault" with a design change to use `comm` instead of `rank` for `mscclStatus`. The global map for `comm` to `mscclStatus` is still not thread safe but should be explicitly handled by mutexes for read-write operations. This is tested for correctness, but there is a plan to use a thread-safe map data structure in an upcoming release.

 ### **rocAL** (2.3.0)
--- a/RELEASE.md
+++ b/RELEASE.md
--- a/default.xml
+++ b/default.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.0.1"
+    <default revision="refs/tags/rocm-7.0.2"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
@@ -41,7 +41,6 @@
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="composable_kernel" />
-    <project groups="mathlibs" name="hipSOLVER" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
@@ -57,7 +56,6 @@
    <project groups="mathlibs" name="rocm-libraries" />
    <project groups="mathlibs" name="rocPyDecode" />
    <project groups="mathlibs" name="rocSHMEM" />
-    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
    <project groups="mathlibs" name="rpp" />
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -1,136 +1,137 @@
-ROCm Version,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
-      ,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
-      ,RHEL 8.10 [#rhel-700-past-60]_,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
-      ,SLES 15 SP7 [#sles-db-700-past-60]_,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
-      ,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
-      ,"Oracle Linux 9, 8 [#ol-700-mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
-      ,Debian 12 [#sles-db-700-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
-      ,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,,,,,,,,,,,,
-,Rocky Linux 9 [#rl-700-past-60]_,,,,,,,,,,,,,,,,,,
-      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,,,,,,,,,,,,,,,,,,
-,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
-      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
-      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
-      ,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
-      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
-      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
-      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os-past-60]_,,,,,,,,,,,,,,,,,,
-,gfx1201 [#RDNA-OS-700-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1200 [#RDNA-OS-700-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-,gfx1101 [#RDNA-OS-700-past-60]_ [#rd-v710-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1100 [#RDNA-OS-700-past-60]_,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
-      ,gfx1030 [#RDNA-OS-700-past-60]_ [#rd-v620-past-60]_,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
-      ,gfx942 [#mi325x-os-past-60]_ [#mi300x-os-past-60]_ [#mi300A-os-past-60]_,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
-      ,gfx90a [#mi200x-os-past-60]_,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
-      ,gfx908 [#mi100-os-past-60]_,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
-,,,,,,,,,,,,,,,,,,,
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
-      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,N/A,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
-,,,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
-      ,,,,,,,,,,,,,,,,,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      Thrust,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      CUB,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-,,,,,,,,,,,,,,,,,,,
-     DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
-      ,,,,,,,,,,,,,,,,,,,
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
-      :doc:`MIOpen <miopen:index>`,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`MIVisionX <mivisionx:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
-      :doc:`rocAL <rocal:index>`,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`rocDecode <rocdecode:index>`,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
-      :doc:`rocJPEG <rocjpeg:index>`,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`RPP <rpp:index>`,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
-      ,,,,,,,,,,,,,,,,,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`RCCL <rccl:index>`,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
-      :doc:`rocSHMEM <rocshmem:index>`,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      ,,,,,,,,,,,,,,,,,,,
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
-      :doc:`hipBLASLt <hipblaslt:index>`,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
-      :doc:`hipFFT <hipfft:index>`,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
-      :doc:`hipfort <hipfort:index>`,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
-      :doc:`hipRAND <hiprand:index>`,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
-      :doc:`hipSOLVER <hipsolver:index>`,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
-      :doc:`hipSPARSE <hipsparse:index>`,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
-      :doc:`rocALUTION <rocalution:index>`,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
-      :doc:`rocBLAS <rocblas:index>`,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
-      :doc:`rocFFT <rocfft:index>`,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
-      :doc:`rocRAND <rocrand:index>`,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
-      :doc:`rocSOLVER <rocsolver:index>`,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
-      :doc:`rocSPARSE <rocsparse:index>`,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
-      :doc:`rocWMMA <rocwmma:index>`,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
-      :doc:`Tensile <tensile:src/index>`,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
-      ,,,,,,,,,,,,,,,,,,,
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`hipCUB <hipcub:index>`,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`hipTensor <hiptensor:index>`,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
-      :doc:`rocPRIM <rocprim:index>`,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`rocThrust <rocthrust:index>`,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,,,,,
-      SUPPORT LIBS,,,,,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
-      ,,,,,,,,,,,,,,,,,,,
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`AMD SMI <amdsmi:index>`,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
-      :doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
-      ,,,,,,,,,,,,,,,,,,,
-      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCTracer <roctracer:index>`,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
-      ,,,,,,,,,,,,,,,,,,,
-      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,
-      :doc:`HIPIFY <hipify:index>`,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
-      ,,,,,,,,,,,,,,,,,,,
-      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      `Flang <https://github.com/ROCm/flang>`_,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`llvm-project <llvm-project:index>`,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-,,,,,,,,,,,,,,,,,,,
-      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`HIP <hip:index>`,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
+ROCm Version,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
+      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
+      ,,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
+      ,"RHEL 10.0 [#rhel-10-702-past-60]_, 9.6 [#rhel-10-702-past-60]_, 9.4 [#rhel-94-702-past-60]_","RHEL 9.6 [#rhel-10-702-past-60]_, 9.4 [#rhel-94-702-past-60]_","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
+      ,RHEL 8.10 [#rhel-700-past-60]_,RHEL 8.10 [#rhel-700-past-60]_,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
+      ,SLES 15 SP7 [#sles-db-700-past-60]_,SLES 15 SP7 [#sles-db-700-past-60]_,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
+      ,,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
+      ,"Oracle Linux 10, 9, 8 [#ol-700-mi300x-past-60]_","Oracle Linux 9, 8 [#ol-700-mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
+      ,"Debian 13 [#db-mi300x-past-60]_, 12 [#sles-db-700-past-60]_",Debian 12 [#sles-db-700-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
+      ,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,,,,,,,,,,,,
+      ,Rocky Linux 9 [#rl-700-past-60]_,Rocky Linux 9 [#rl-700-past-60]_,,,,,,,,,,,,,,,,,,
+      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,,,,,,,,,,,,,,,,,,
+      ,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
+      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
+      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
+      ,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
+      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
+      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
+      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os-past-60]_,gfx950 [#mi350x-os-past-60]_,,,,,,,,,,,,,,,,,,
+      ,gfx1201 [#RDNA-OS-700-past-60]_,gfx1201 [#RDNA-OS-700-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
+      ,gfx1200 [#RDNA-OS-700-past-60]_,gfx1200 [#RDNA-OS-700-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
+      ,gfx1101 [#RDNA-OS-700-past-60]_ [#rd-v710-past-60]_,gfx1101 [#RDNA-OS-700-past-60]_ [#rd-v710-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
+      ,gfx1100 [#RDNA-OS-700-past-60]_,gfx1100 [#RDNA-OS-700-past-60]_,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
+      ,gfx1030 [#RDNA-OS-700-past-60]_ [#rd-v620-past-60]_,gfx1030 [#RDNA-OS-700-past-60]_ [#rd-v620-past-60]_,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
+      ,gfx942 [#mi325x-os-past-60]_ [#mi300x-os-past-60]_ [#mi300A-os-past-60]_,gfx942 [#mi325x-os-past-60]_ [#mi300x-os-past-60]_ [#mi300A-os-past-60]_,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
+      ,gfx90a [#mi200x-os-past-60]_,gfx90a [#mi200x-os-past-60]_,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
+      ,gfx908 [#mi100-os-past-60]_,gfx908 [#mi100-os-past-60]_,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
+      ,,,,,,,,,,,,,,,,,,,,
+      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
+      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,b6356,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
+      ,,,,,,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,,,,,,
+      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
+      ,,,,,,,,,,,,,,,,,,,,
+      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      Thrust,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      CUB,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      ,,,,,,,,,,,,,,,,,,,,
+     DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
+      ,,,,,,,,,,,,,,,,,,,,
+      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
+      :doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
+      :doc:`MIOpen <miopen:index>`,3.5.0,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`MIVisionX <mivisionx:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
+      :doc:`rocAL <rocal:index>`,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`rocDecode <rocdecode:index>`,1.0.0,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
+      :doc:`rocJPEG <rocjpeg:index>`,1.1.0,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`RPP <rpp:index>`,2.0.0,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
+      ,,,,,,,,,,,,,,,,,,,,
+      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`RCCL <rccl:index>`,2.26.6,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
+      :doc:`rocSHMEM <rocshmem:index>`,3.0.0,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      ,,,,,,,,,,,,,,,,,,,,
+      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
+      :doc:`hipBLAS <hipblas:index>`,3.0.2,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
+      :doc:`hipBLASLt <hipblaslt:index>`,1.0.0,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
+      :doc:`hipFFT <hipfft:index>`,1.0.20,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
+      :doc:`hipfort <hipfort:index>`,0.7.0,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
+      :doc:`hipRAND <hiprand:index>`,3.0.0,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
+      :doc:`hipSOLVER <hipsolver:index>`,3.0.0,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
+      :doc:`hipSPARSE <hipsparse:index>`,4.0.1,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
+      :doc:`rocALUTION <rocalution:index>`,4.0.0,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
+      :doc:`rocBLAS <rocblas:index>`,5.0.2,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
+      :doc:`rocFFT <rocfft:index>`,1.0.34,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
+      :doc:`rocRAND <rocrand:index>`,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
+      :doc:`rocSOLVER <rocsolver:index>`,3.30.1,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
+      :doc:`rocSPARSE <rocsparse:index>`,4.0.2,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
+      :doc:`rocWMMA <rocwmma:index>`,2.0.0,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
+      :doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
+      ,,,,,,,,,,,,,,,,,,,,
+      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`hipCUB <hipcub:index>`,4.0.0,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
+      :doc:`rocPRIM <rocprim:index>`,4.0.1,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`rocThrust <rocthrust:index>`,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      ,,,,,,,,,,,,,,,,,,,,
+      SUPPORT LIBS,,,,,,,,,,,,,,,,,,,,
+      `hipother <https://github.com/ROCm/hipother>`_,7.0.51830,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
+      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
+      ,,,,,,,,,,,,,,,,,,,,
+      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`AMD SMI <amdsmi:index>`,26.0.2,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
+      :doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
+      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
+      ,,,,,,,,,,,,,,,,,,,,
+      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.1,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70002,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCTracer <roctracer:index>`,4.1.70002,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
+      ,,,,,,,,,,,,,,,,,,,,
+      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,,
+      :doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
+      ,,,,,,,,,,,,,,,,,,,,
+      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
+      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      `Flang <https://github.com/ROCm/flang>`_,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`llvm-project <llvm-project:index>`,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      ,,,,,,,,,,,,,,,,,,,,
+      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,
+      :doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`HIP <hip:index>`,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -11,9 +11,8 @@ Use this matrix to view the ROCm compatibility and system requirements across su
 You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.

 Accelerators and GPUs listed in the following table support compute workloads (no display
-information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
-workloads, see the `Use ROCm on Radeon GPU documentation
-<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
+information or graphics). If you’re using ROCm with AMD Radeon GPUs or Ryzen APUs for graphics
+workloads, see the :docs:`Use ROCm on Radeon and Ryzen <radeon:index.html>` to verify
 compatibility and system requirements.

 .. |br| raw:: html
@@ -23,20 +22,20 @@ compatibility and system requirements.
 .. container:: format-big-table

  .. csv-table::
-      :header: "ROCm Version", "7.0.1/7.0.0", "6.4.3", "6.3.0"
+      :header: "ROCm Version", "7.0.2", "7.0.1/7.0.0", "6.4.0"
      :stub-columns: 1

-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2
      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
-      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
-      ,RHEL 8.10 [#rhel-700]_,RHEL 8.10,RHEL 8.10
-      ,SLES 15 SP7 [#sles-db-700]_,"SLES 15 SP7, SP6","SLES 15 SP6, SP5"
-      ,"Oracle Linux 9, 8 [#ol-700-mi300x]_","Oracle Linux 9, 8 [#ol-mi300x]_",Oracle Linux 8.10 [#ol-mi300x]_
-      ,Debian 12 [#sles-db-700]_,Debian 12 [#single-node]_,
-      ,Azure Linux 3.0 [#az-mi300x]_,Azure Linux 3.0 [#az-mi300x]_,
-      ,Rocky Linux 9 [#rl-700]_,,
+      ,"RHEL 10.0 [#rhel-10-702]_, 9.6 [#rhel-10-702]_, 9.4 [#rhel-94-702]_","RHEL 9.6 [#rhel-10-702]_, 9.4 [#rhel-94-702]_","RHEL 9.5, 9.4"
+      ,RHEL 8.10 [#rhel-700]_,RHEL 8.10 [#rhel-700]_,RHEL 8.10
+      ,SLES 15 SP7 [#sles-db-700]_,SLES 15 SP7 [#sles-db-700]_,SLES 15 SP6
+      ,"Oracle Linux 10, 9, 8 [#ol-700-mi300x]_","Oracle Linux 9, 8 [#ol-700-mi300x]_","Oracle Linux 9, 8 [#ol-mi300x]_"
+      ,"Debian 13 [#db-mi300x]_, 12 [#sles-db-700]_",Debian 12 [#sles-db-700]_,Debian 12 [#single-node]_
+      ,Azure Linux 3.0 [#az-mi300x]_,Azure Linux 3.0 [#az-mi300x]_,Azure Linux 3.0 [#az-mi300x]_
+      ,Rocky Linux 9 [#rl-700]_,Rocky Linux 9 [#rl-700]_,
      ,.. _architecture-support-compatibility-matrix:,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,
      ,CDNA3,CDNA3,CDNA3
      ,CDNA2,CDNA2,CDNA2
      ,CDNA,CDNA,CDNA
@@ -44,137 +43,140 @@ compatibility and system requirements.
      ,RDNA3,RDNA3,RDNA3
      ,RDNA2,RDNA2,RDNA2
      ,.. _gpu-support-compatibility-matrix:,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os]_,,
-      ,gfx1201 [#RDNA-OS-700]_,gfx1201 [#RDNA-OS]_,
-      ,gfx1200 [#RDNA-OS-700]_,gfx1200 [#RDNA-OS]_,
-      ,gfx1101 [#RDNA-OS-700]_ [#rd-v710]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
-      ,gfx1100 [#RDNA-OS-700]_,gfx1100,gfx1100
-      ,gfx1030 [#RDNA-OS-700]_ [#rd-v620]_,gfx1030,gfx1030
-      ,gfx942 [#mi325x-os]_ [#mi300x-os]_ [#mi300A-os]_,gfx942,gfx942
-      ,gfx90a [#mi200x-os]_,gfx90a,gfx90a
-      ,gfx908 [#mi100-os]_,gfx908,gfx908
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os]_,gfx950 [#mi350x-os]_,
+      ,gfx1201 [#RDNA-OS-700]_,gfx1201 [#RDNA-OS-700]_,
+      ,gfx1200 [#RDNA-OS-700]_,gfx1200 [#RDNA-OS-700]_,
+      ,gfx1101 [#RDNA-OS-700]_ [#rd-v710]_,gfx1101 [#RDNA-OS-700]_ [#rd-v710]_,
+      ,gfx1100 [#RDNA-OS-700]_,gfx1100 [#RDNA-OS-700]_,gfx1100
+      ,gfx1030 [#RDNA-OS-700]_ [#rd-v620]_,gfx1030 [#RDNA-OS-700]_ [#rd-v620]_,gfx1030
+      ,gfx942 [#mi325x-os]_ [#mi300x-os]_ [#mi300A-os]_,gfx942 [#mi325x-os]_ [#mi300x-os]_ [#mi300A-os]_,gfx942
+      ,gfx90a [#mi200x-os]_,gfx90a [#mi200x-os]_,gfx90a
+      ,gfx908 [#mi100-os]_,gfx908 [#mi100-os]_,gfx908
      ,,,
      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.31
-      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,85f95ae
-      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,0.7.0
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.17.3
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1, 2.17.1 [#tf-mi350]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350]_","2.18.1, 2.17.1, 2.16.2"
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.6.0,0.4.35
+      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0
+      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,b6356,b5997
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.22.0,1.20.0
      ,,,
      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.3.0,>=1.3.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.15.0,>=1.15.0
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.3.0
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0
      ,,,
      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
-      Thrust,2.6.0,2.5.0,2.3.2
-      CUB,2.6.0,2.5.0,2.3.2
+      Thrust,2.6.0,2.6.0,2.5.0
+      CUB,2.6.0,2.6.0,2.5.0
      ,,,
      DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.1 [#driver_patch]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
+      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch]_, 30.10, |br| 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
      ,,,
      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.12.0,2.11.0
-      :doc:`MIOpen <miopen:index>`,3.5.0,3.4.0,3.3.0
-      :doc:`MIVisionX <mivisionx:index>`,3.3.0,3.2.0,3.1.0
-      :doc:`rocAL <rocal:index>`,2.3.0,2.2.0,2.1.0
-      :doc:`rocDecode <rocdecode:index>`,1.0.0,0.10.0,0.8.0
-      :doc:`rocJPEG <rocjpeg:index>`,1.1.0,0.8.0,0.6.0
-      :doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.3.1,0.2.0
-      :doc:`RPP <rpp:index>`,2.0.0,1.9.10,1.9.1
+      :doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.13.0,2.12.0
+      :doc:`MIOpen <miopen:index>`,3.5.0,3.5.0,3.4.0
+      :doc:`MIVisionX <mivisionx:index>`,3.3.0,3.3.0,3.2.0
+      :doc:`rocAL <rocal:index>`,2.3.0,2.3.0,2.2.0
+      :doc:`rocDecode <rocdecode:index>`,1.0.0,1.0.0,0.10.0
+      :doc:`rocJPEG <rocjpeg:index>`,1.1.0,1.1.0,0.8.0
+      :doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.6.0,0.3.1
+      :doc:`RPP <rpp:index>`,2.0.0,2.0.0,1.9.10
      ,,,
      COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
-      :doc:`RCCL <rccl:index>`,2.26.6,2.22.3,2.21.5
-      :doc:`rocSHMEM <rocshmem:index>`,3.0.0,2.0.1,N/A
+      :doc:`RCCL <rccl:index>`,2.26.6,2.26.6,2.22.3
+      :doc:`rocSHMEM <rocshmem:index>`,3.0.0,3.0.0,2.0.0
      ,,,
      MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,3.0.0,2.4.0,2.3.0
-      :doc:`hipBLASLt <hipblaslt:index>`,1.0.0,0.12.1,0.10.0
-      :doc:`hipFFT <hipfft:index>`,1.0.20,1.0.18,1.0.17
-      :doc:`hipfort <hipfort:index>`,0.7.0,0.6.0,0.5.0
-      :doc:`hipRAND <hiprand:index>`,3.0.0,2.12.0,2.11.0
-      :doc:`hipSOLVER <hipsolver:index>`,3.0.0,2.4.0,2.3.0
-      :doc:`hipSPARSE <hipsparse:index>`,4.0.1,3.2.0,3.1.2
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.3,0.2.2
-      :doc:`rocALUTION <rocalution:index>`,4.0.0,3.2.3,3.2.1
-      :doc:`rocBLAS <rocblas:index>`,5.0.0,4.4.1,4.3.0
-      :doc:`rocFFT <rocfft:index>`,1.0.34,1.0.32,1.0.31
-      :doc:`rocRAND <rocrand:index>`,4.0.0,3.3.0,3.2.0
-      :doc:`rocSOLVER <rocsolver:index>`,3.30.0,3.28.2,3.27.0
-      :doc:`rocSPARSE <rocsparse:index>`,4.0.2,3.4.0,3.3.0
-      :doc:`rocWMMA <rocwmma:index>`,2.0.0,1.7.0,1.6.0
-      :doc:`Tensile <tensile:src/index>`,4.44.0,4.43.0,4.42.0
+      :doc:`hipBLAS <hipblas:index>`,3.0.2,3.0.0,2.4.0
+      :doc:`hipBLASLt <hipblaslt:index>`,1.0.0,1.0.0,0.12.0
+      :doc:`hipFFT <hipfft:index>`,1.0.20,1.0.20,1.0.18
+      :doc:`hipfort <hipfort:index>`,0.7.0,0.7.0,0.6.0
+      :doc:`hipRAND <hiprand:index>`,3.0.0,3.0.0,2.12.0
+      :doc:`hipSOLVER <hipsolver:index>`,3.0.0,3.0.0,2.4.0
+      :doc:`hipSPARSE <hipsparse:index>`,4.0.1,4.0.1,3.2.0
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.4,0.2.3
+      :doc:`rocALUTION <rocalution:index>`,4.0.0,4.0.0,3.2.2
+      :doc:`rocBLAS <rocblas:index>`,5.0.2,5.0.0,4.4.0
+      :doc:`rocFFT <rocfft:index>`,1.0.34,1.0.34,1.0.32
+      :doc:`rocRAND <rocrand:index>`,4.0.0,4.0.0,3.3.0
+      :doc:`rocSOLVER <rocsolver:index>`,3.30.1,3.30.0,3.28.0
+      :doc:`rocSPARSE <rocsparse:index>`,4.0.2,4.0.2,3.4.0
+      :doc:`rocWMMA <rocwmma:index>`,2.0.0,2.0.0,1.7.0
+      :doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.43.0
      ,,,
      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
-      :doc:`hipCUB <hipcub:index>`,4.0.0,3.4.0,3.3.0
-      :doc:`hipTensor <hiptensor:index>`,2.0.0,1.5.0,1.4.0
-      :doc:`rocPRIM <rocprim:index>`,4.0.0,3.4.1,3.3.0
-      :doc:`rocThrust <rocthrust:index>`,4.0.0,3.3.0,3.3.0
+      :doc:`hipCUB <hipcub:index>`,4.0.0,4.0.0,3.4.0
+      :doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,1.5.0
+      :doc:`rocPRIM <rocprim:index>`,4.0.1,4.0.0,3.4.0
+      :doc:`rocThrust <rocthrust:index>`,4.0.0,4.0.0,3.3.0
      ,,,
      SUPPORT LIBS,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.0.51830,6.4.43483,6.3.42131
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.0,6.4.3,6.3.0
+      `hipother <https://github.com/ROCm/hipother>`_,7.0.51830,7.0.51830,6.4.43482
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.2,7.0.1/7.0.0,6.4.0
      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
      ,,,
      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
-      :doc:`AMD SMI <amdsmi:index>`,26.0.0,25.5.1,24.7.1
-      :doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,0.3.0,0.3.0
+      :doc:`AMD SMI <amdsmi:index>`,26.0.2,26.0.0,25.3.0
+      :doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,1.1.0,0.3.0
      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.7.0,7.4.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.1.0,1.1.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.5.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.2.0,1.1.0
      ,,,
      PERFORMANCE TOOLS,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.1.1,3.0.0
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.0,1.0.2,0.1.0
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70000,2.0.60403,2.0.60300
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,0.6.0,0.5.0
-      :doc:`ROCTracer <roctracer:index>`,4.1.70000,4.1.60403,4.1.60300
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,1.4.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.2.3,3.1.0
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.1,1.1.0,1.0.0
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70002,2.0.70000,2.0.60400
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,0.6.0
+      :doc:`ROCTracer <roctracer:index>`,4.1.70002,4.1.70000,4.1.60400
      ,,,
      DEVELOPMENT TOOLS,,,
-      :doc:`HIPIFY <hipify:index>`,20.0.0,19.0.0,18.0.0.24455
+      :doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,19.0.0
      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.3,0.77.2,0.77.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,15.2.0,15.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.4.0,0.4.0
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.0.4,2.0.3
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.3,0.77.2
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,15.2.0
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.4.0
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.0.4
      ,,,
      COMPILERS,.. _compilers-support-compatibility-matrix:,,
+      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
-      `Flang <https://github.com/ROCm/flang>`_,20.0.0.25314,19.0.0.25224,18.0.0.24455
-      :doc:`llvm-project <llvm-project:index>`,20.0.0.25314,19.0.0.25224,18.0.0.24491
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25314,19.0.0.25224,18.0.0.24491
+      `Flang <https://github.com/ROCm/flang>`_,20.0.0.25385,20.0.0.25314,19.0.0.25133
+      :doc:`llvm-project <llvm-project:index>`,20.0.0.25385,20.0.0.25314,19.0.0.25133
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25385,20.0.0.25314,19.0.0.25133
      ,,,
      RUNTIMES,.. _runtime-support-compatibility-matrix:,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51830,6.4.43484,6.3.42131
-      :doc:`HIP <hip:index>`,7.0.51830,6.4.43484,6.3.42131
+      :doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51831,7.0.51830,6.4.43482
+      :doc:`HIP <hip:index>`,7.0.51831,7.0.51830,6.4.43482
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.15.0,1.14.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0

 .. rubric:: Footnotes

-.. [#rhel-700] RHEL 8.10 is only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
-.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 9 is supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPUs.
+.. [#rhel-10-702] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU.
+.. [#rhel-94-702] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`.
+.. [#rhel-700] RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
+.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
 .. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
-.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
-.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710.
-.. [#rl-700] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
-.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-.. [#mi350x-os] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
-.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
-.. [#RDNA-OS] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
-.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
-.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-.. [#mi300x-os] **For ROCm 7.0.x** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
-.. [#mi300A-os] **For ROCm 7.0.x** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
-.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
-.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
-.. [#7700XT-OS] **Prior ROCm 7.0.0** - Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
-.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
-.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
+.. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
+.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
+.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
+.. [#rl-700] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs.
+.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
+.. [#mi350x-os] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9.
+.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, and RHEL 9.6.
+.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) GPUs are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0.
+.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) GPUs are supported only on Ubuntu 24.04.3 and Ubuntu 22.04.5.
+.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPUs (gfx942) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
+.. [#mi300x-os] **For ROCm 7.0.x** - AMD Instinct MI300X GPUs (gfx942) are supported on all listed :ref:`supported_distributions`.
+.. [#mi300A-os] **For ROCm 7.0.x** - AMD Instinct MI300A GPUs (gfx942) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
+.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
+.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
+.. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
+.. [#dgl_compat] DGL is supported only on ROCm 6.4.0.
+.. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
 .. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
 .. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
 .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
@@ -198,6 +200,8 @@ Use this lookup table to confirm which operating system and kernel versions are
   ,,
   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 [GA], 6.8 [HWE]", 2.35
   ,,
+   `Red Hat Enterprise Linux (RHEL 10) <https://access.redhat.com/articles/3078#RHEL9>`_, 10.0, 6.12.0-55, 2.39
+   ,,
   `Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14.0-570, 2.34
   ,9.5, 5.14+, 2.34
   ,9.4, 5.14.0-427, 2.34
@@ -210,10 +214,12 @@ Use this lookup table to confirm which operating system and kernel versions are
   ,,
   `Rocky Linux <https://wiki.rockylinux.org/rocky/version/>`_, 9, 5.14.0-570, 2.34
   ,,
-   `Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 6.12.0 (UEK), 2.34
+   `Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 10, 6.12.0 (UEK), 2.39
+   ,9, 6.12.0 (UEK), 2.34
   ,8, 5.15.0 (UEK), 2.28
   ,,
-   `Debian <https://www.debian.org/download>`_,12, 6.1.0, 2.36
+   `Debian <https://www.debian.org/download>`_,13, 6.12, 2.35
+   ,12, 6.1.0, 2.36
   ,,
   `Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.92, 2.38
   ,,
@@ -248,41 +254,46 @@ Expand for full historical view of:

   .. rubric:: Footnotes

-   .. [#rhel-700-past-60] **For ROCm 7.0.x** - RHEL 8.10 is only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
-   .. [#ol-700-mi300x-past-60] **For ROCm 7.0.x** - Oracle Linux 9 is supported only on AMD Instinct MI300X, MI350X, and MI355X. Oracle Linux 8 is only supported on AMD Instinct MI300X.
-   .. [#mi300x-past-60] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X.
-   .. [#sles-db-700-past-60] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
-   .. [#single-node-past-60] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-   .. [#az-mi300x-past-60] Starting from ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710.
-   .. [#az-mi300x-630-past-60] **Prior ROCm 6.4.0**- Azure Linux 3.0 is supported only on AMD Instinct MI300X.
-   .. [#rl-700-past-60] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
-   .. [#mi350x-os-past-60] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
-   .. [#RDNA-OS-700-past-60] **For ROCm 7.0.x** AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
+   .. [#rhel-10-702-past-60] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU.
+   .. [#rhel-94-702-past-60] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`.
+   .. [#rhel-700-past-60] **For ROCm 7.0.x** - RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
+   .. [#ol-700-mi300x-past-60] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
+   .. [#mi300x-past-60] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
+   .. [#db-mi300x-past-60] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
+   .. [#sles-db-700-past-60] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
+   .. [#single-node-past-60] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
+   .. [#az-mi300x-past-60] Starting from ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
+   .. [#az-mi300x-630-past-60] **Prior ROCm 6.4.0**- Azure Linux 3.0 is supported only on AMD Instinct MI300X GPUs.
+   .. [#rl-700-past-60] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs.
+   .. [#mi350x-os-past-60] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
+   .. [#RDNA-OS-700-past-60] **For ROCm 7.0.x** AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9.
   .. [#RDNA-OS-past-60] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-   .. [#rd-v710-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
-   .. [#rd-v620-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
-   .. [#mi325x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
+   .. [#rd-v710-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) is supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0.
+   .. [#rd-v620-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) is supported only on Ubuntu 24.04.3 and Ubuntu 22.04.5.
+   .. [#mi325x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI325X GPU (gfx942) is supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
   .. [#mi300x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
-   .. [#mi300A-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
-   .. [#mi200x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
-   .. [#mi100-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
-   .. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
+   .. [#mi300A-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
+   .. [#mi200x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
+   .. [#mi100-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI100 GPU (gfx908) is supported only on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
+   .. [#7700XT-OS-past-60] **Prior to ROCm 7.0.0** - Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
   .. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
   .. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
   .. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
   .. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
-   .. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
-   .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
-   .. [#verl_compat-past-60] verl is only supported on ROCm 6.2.0.
-   .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0.
-   .. [#dgl_compat-past-60] DGL is only supported on ROCm 6.4.0.
-   .. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0.
-   .. [#taichi_compat-past-60] Taichi is only supported on ROCm 6.3.2.
-   .. [#ray_compat-past-60] Ray is only supported on ROCm 6.4.1.
-   .. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 6.4.0.
+   .. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is supported only on Ubuntu 22.04.4 and Oracle Linux.
+   .. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is supported only on Ubuntu 22.04.4 and Oracle Linux.
+   .. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is supported only on Ubuntu 22.04.4.
+   .. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is supported only on Ubuntu 22.04.3.
+   .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is supported only on Ubuntu 22.04.3.
+   .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
+   .. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
+   .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
+   .. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0.
+   .. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
+   .. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
+   .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.
+   .. [#llama-cpp_compat-past-60] llama.cpp is supported only on ROCm 7.0.0 and 6.4.x.
+   .. [#flashinfer_compat-past-60] FlashInfer is supported only on ROCm 6.4.1.
   .. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
   .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
   .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
--- a/docs/compatibility/ml-compatibility/flashinfer-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/flashinfer-compatibility.rst
@@ -0,0 +1,107 @@
+:orphan:
+
+.. meta::
+    :description: FlashInfer deep learning framework compatibility
+    :keywords: GPU, LLM, FlashInfer, compatibility
+
+.. version-set:: rocm_version latest
+
+********************************************************************************
+FlashInfer compatibility
+********************************************************************************
+
+`FlashInfer <https://docs.flashinfer.ai/index.html>`__ is a library and kernel generator 
+for Large Language Models (LLMs) that provides high-performance implementation of graphics 
+processing units (GPUs) kernels. FlashInfer focuses on LLM serving and inference, as well 
+as advanced performance across diverse scenarios.
+
+FlashInfer features highly efficient attention kernels, load-balanced scheduling, and memory-optimized 
+techniques, while supporting customized attention variants. It’s compatible with ``torch.compile``, and 
+offers high-performance LLM-specific operators, with easy integration through PyTorch, and C++ APIs.
+
+.. note::
+
+  The ROCm port of FlashInfer is under active development, and some features are not yet available. 
+  For the latest feature compatibility matrix, refer to the ``README`` of the 
+  `https://github.com/ROCm/flashinfer <https://github.com/ROCm/flashinfer>`__ repository.
+
+Support for the ROCm port of FlashInfer is available as follows:
+
+- ROCm support for FlashInfer is hosted in the `https://github.com/ROCm/flashinfer 
+  <https://github.com/ROCm/flashinfer>`__ repository. This location differs from the 
+  `https://github.com/flashinfer-ai/flashinfer <https://github.com/flashinfer-ai/flashinfer>`_ 
+  upstream repository.
+
+- To install FlashInfer, use the prebuilt :ref:`Docker image <flashinfer-docker-compat>`, 
+  which includes ROCm, FlashInfer, and all required dependencies.
+
+  - See the :doc:`ROCm FlashInfer installation guide <rocm-install-on-linux:install/3rd-party/flashinfer-install>` 
+    to install and get started.
+
+  - See the `Installation guide <https://docs.flashinfer.ai/installation.html>`__ 
+    in the upstream FlashInfer documentation.
+
+.. note::
+
+  Flashinfer is supported on ROCm 6.4.1.
+
+Supported devices
+================================================================================
+
+**Officially Supported**: AMD Instinct™ MI300X
+
+
+.. _flashinfer-recommendations:
+
+Use cases and recommendations
+================================================================================
+
+This release of FlashInfer on ROCm provides the decode functionality for LLM inferencing.
+In the decode phase, tokens are generated sequentially, with the model predicting each new 
+token based on the previously generated tokens and the input context.
+
+FlashInfer on ROCm brings over upstream features such as load balancing, sparse and dense 
+attention optimizations, and batching support, enabling efficient execution on AMD Instinct™ MI300X GPUs.
+
+Because large LLMs often require substantial KV caches or long context windows, FlashInfer on ROCm 
+also implements cascade attention from upstream to reduce memory usage. 
+
+For currently supported use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
+where you can search for examples and best practices to optimize your workloads on AMD GPUs.
+
+.. _flashinfer-docker-compat:
+
+Docker image compatibility
+================================================================================
+
+.. |docker-icon| raw:: html
+
+   <i class="fab fa-docker"></i>
+
+AMD validates and publishes `ROCm FlashInfer images <https://hub.docker.com/r/rocm/flashinfer/tags>`__
+with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
+inventories represent the FlashInfer version from the official Docker Hub.
+The Docker images have been validated for `ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__.
+Click |docker-icon| to view the image on Docker Hub.
+
+.. list-table:: 
+    :header-rows: 1
+    :class: docker-image-compatibility
+
+    * - Docker image
+      - ROCm
+      - FlashInfer
+      - PyTorch
+      - Ubuntu
+      - Python
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/flashinfer/flashinfer-0.2.5_rocm6.4_ubuntu24.04_py3.12_pytorch2.7/images/sha256-558914838821c88c557fb6d42cfbc1bdb67d79d19759f37c764a9ee801f93313"><i class="fab fa-docker fa-lg"></i> rocm/flashinfer</a>
+      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
+      - `v0.2.5 <https://github.com/flashinfer-ai/flashinfer/releases/tag/v0.2.5>`__
+      - `2.7.1 <https://github.com/ROCm/pytorch/releases/tag/v2.7.1>`__
+      - 24.04
+      - `3.12 <https://www.python.org/downloads/release/python-3129/>`__
+
+
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -90,75 +90,15 @@ For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.b
 Docker image compatibility
 ================================================================================

-.. |docker-icon| raw:: html
+AMD provides preconfigured Docker images with JAX and the ROCm backend.
+These images are published on `Docker Hub <https://hub.docker.com/r/rocm/jax>`__ and are the
+recommended way to get started with deep learning with JAX on ROCm.
+For ``jax-community`` images, see `rocm/jax-community
+<https://hub.docker.com/r/rocm/jax-community/tags>`__ on Docker Hub.

-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
-with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories represent the latest JAX version from the official Docker Hub and are validated for
-`ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
-icon to view the image on Docker Hub.
-
-.. list-table:: JAX Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - JAX
-      - Linux
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
-
-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
-      - Ubuntu 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
-
-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
-      - Ubuntu 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
-
-AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
-with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
-
-.. list-table:: JAX community Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - JAX
-      - Linux
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
+To find the right image tag, see the :ref:`JAX on ROCm installation
+documentation <rocm-install-on-linux:jax-docker-support>` for a list of
+available ``rocm/jax`` images.

 .. _key_rocm_libraries:

--- a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
@@ -16,7 +16,7 @@ for Large Language Model (LLM) inference that runs on both central processing un
 a simple, dependency-free setup. 

 The framework supports multiple quantization options, from 1.5-bit to 8-bit integers, 
-to speed up inference and reduce memory usage. Originally built as a CPU-first library, 
+to accelerate inference and reduce memory usage. Originally built as a CPU-first library, 
 llama.cpp is easy to integrate with other programming environments and is widely 
 adopted across diverse platforms, including consumer devices. 

@@ -40,12 +40,12 @@ with ROCm support:

 .. note::

-  llama.cpp is supported on ROCm 6.4.0.
+  llama.cpp is supported on ROCm 7.0.0 and ROCm 6.4.x.

 Supported devices
 ================================================================================

-**Officially Supported**: AMD Instinct™ MI300X, MI210
+**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210


 Use cases and recommendations
@@ -70,7 +70,7 @@ llama.cpp is also used in a range of real-world applications, including:
 For more use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
 where you can search for llama.cpp examples and best practices to optimize your workloads on AMD GPUs.

- The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__, 
+- The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__ 
  blog post outlines how the open-source llama.cpp framework enables efficient LLM inference—including interactive inference with ``llama-cli``, 
  server deployment with ``llama-server``, GGUF model preparation and quantization, performance benchmarking, and optimizations tailored for 
  AMD Instinct GPUs within the ROCm ecosystem. 
@@ -84,9 +84,9 @@ Docker image compatibility

   <i class="fab fa-docker"></i>

-AMD validates and publishes `ROCm llama.cpp Docker images <https://hub.docker.com/r/rocm/llama.cpp>`__
+AMD validates and publishes `ROCm llama.cpp Docker images <https://hub.docker.com/r/rocm/llama.cpp/tags>`__
 with ROCm backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+inventories represent the available llama.cpp versions from the official Docker Hub.
 Click |docker-icon| to view the image on Docker Hub.

 .. important::
@@ -105,8 +105,115 @@ Click |docker-icon| to view the image on Docker Hub.
      - Server Docker
      - Light Docker
      - llama.cpp
+      - ROCm
      - Ubuntu

+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_full/images/sha256-a2ecd635eaa65bb289a9041330128677f3ae88bee6fee0597424b17e38d4903c"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_server/images/sha256-cb46b47df415addb5ceb6e6fdf0be70bf9d7f6863bbe6e10c2441ecb84246d52"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_light/images/sha256-8f8536eec4b05c0ff1c022f9fc6c527ad1c89e6c1ca0906e4d39e4de73edbde9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - 24.04
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_full/images/sha256-f36de2a3b03ae53e81c85422cb3780368c9891e1ac7884b04403a921fe2ea45d"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_server/images/sha256-df15e8ab11a6837cd3736644fec1e047465d49e37d610ab0b79df000371327df"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_light/images/sha256-4ea2d5bb7964f0ee3ea9b30ba7f343edd6ddfab1b1037669ca7eafad2e3c2bd7"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - 22.04
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_full/images/sha256-5960fc850024a8a76451f9eaadd89b7e59981ae9f393b407310c1ddf18892577"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_server/images/sha256-1b79775d9f546065a6aaf9ca426e1dd4ed4de0b8f6ee83687758cc05af6538e6"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_light/images/sha256-8f863c4c2857ae42bebd64e4f1a0a1e7cc3ec4503f243e32b4a4dcad070ec361"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
+      - 24.04
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_full/images/sha256-888879b3ee208f9247076d7984524b8d1701ac72611689e89854a1588bec9867"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_server/images/sha256-90e4ff99a66743e33fd00728cd71a768588e5f5ef355aaa196669fe65ac70672"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_light/images/sha256-bd447a049939cb99054f8fbf3f2352870fe906a75e2dc3339c845c08b9c53f9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
+      - 22.04
+
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_full/images/sha256-5b3a1bc4889c1fcade434b937fbf9cc1c22ff7dc0317c130339b0c9238bc88c4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_server/images/sha256-5228ff99d0f627a9032d668f4381b2e80dc1e301adc3e0821f26d8354b175271"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_light/images/sha256-b12723b332a826a89b7252dddf868cbe4d1a869562fc4aa4032f59e1a683b968"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
+      - 24.04
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_full/images/sha256-cd6e21a6a73f59b35dd5309b09dd77654a94d783bf13a55c14eb8dbf8e9c2615"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_server/images/sha256-c2b4689ab2c47e6626e8fea22d7a63eb03d47c0fde9f5ef8c9f158d15c423e58"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_light/images/sha256-1acc28f29ed87db9cbda629cb29e1989b8219884afe05f9105522be929e94da4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
+      - 22.04
+
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_full/images/sha256-2f8ae8a44510d96d52dea6cb398b224f7edeb7802df7ec488c6f63d206b3cdc9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_server/images/sha256-fece497ff9f4a28b12f645de52766941da8ead8471aa1ea84b61d4b4568e51f2"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_light/images/sha256-3e14352fa6f8c6128b23cf9342531c20dbfb522550b626e09d83b260a1947022"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
+      - 24.04
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_full/images/sha256-80763062ef0bec15038c35fd01267f1fc99a5dd171d4b48583cc668b15efad69"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_server/images/sha256-db2a6c957555ed83b819bbc54aea884a93192da0fb512dae63d32e0dc4e8ab8f"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_light/images/sha256-c6dbb07cc655fb079d5216e4b77451cb64a9daa0585d23b6fb8b32cb22021197"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
+      - 22.04
+
    * - .. raw:: html

           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_full/images/sha256-f78f6c81ab2f8e957469415fe2370a1334fe969c381d1fe46050c85effaee9d5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
@@ -117,40 +224,52 @@ Click |docker-icon| to view the image on Docker Hub.

           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_light/images/sha256-cc324e6faeedf0e400011f07b49d2dc41a16bae257b2b7befa0f4e2e97231320"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b5997 <https://github.com/ROCm/llama.cpp/tree/release/b5997>`__
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - 24.04

+
 Key ROCm libraries for llama.cpp
 ================================================================================

 llama.cpp functionality on ROCm is determined by its underlying library
 dependencies. These ROCm components affect the capabilities, performance, and
-feature set available to developers.
+feature set available to developers. Ensure you have the required libraries for 
+your corresponding ROCm version.

 .. list-table::
    :header-rows: 1

    * - ROCm library
-      - Version
+      - ROCm 7.0.0 version
+      - ROCm 6.4.x version
      - Purpose
      - Usage
    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
-      - :version-ref:`hipBLAS rocm_version`
+      - 3.0.0
+      - 2.4.0
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
      - Supports operations such as matrix multiplication, matrix-vector
        products, and tensor contractions. Utilized in both dense and batched
        linear algebra operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
-      - :version-ref:`hipBLASLt rocm_version`
+      - 1.0.0
+      - 0.12.0
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
      - By setting the flag ``ROCBLAS_USE_HIPBLASLT``, you can dispatch hipblasLt
        kernels where possible.
    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
-      - :version-ref:`rocWMMA rocm_version`
+      - 2.0.0
+      - 1.7.0
      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
        multiplication (GEMM) and accumulation operations with mixed precision
        support.
      - Can be used to enhance the flash attention performance on AMD compute, by enabling
-        the flag during compile time.
+        the flag during compile time.
+
+Previous versions
+===============================================================================
+See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/llama-cpp-history` to find documentation for previous releases
+of the ``ROCm/llama.cpp`` Docker image.
--- a/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
@@ -28,7 +28,7 @@ Supported devices
 ================================================================================

 - **Officially Supported**: AMD Instinct MI300X
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
+- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210

 Supported models and features
 ================================================================================
--- a/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
@@ -89,141 +89,13 @@ For more use cases and recommendations, see `ROCm PyTorch blog posts <https://ro
 Docker image compatibility
 ================================================================================

-.. |docker-icon| raw:: html
+AMD provides preconfigured Docker images with PyTorch and the ROCm backend.
+These images are published on `Docker Hub <https://hub.docker.com/r/rocm/pytorch>`__ and are the
+recommended way to get started with deep learning with PyTorch on ROCm.

-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
-with ROCm backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
-Click |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: PyTorch Docker image components
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker
-      - PyTorch
-      - Ubuntu
-      - Python
-      - Apex
-      - torchvision
-      - TensorBoard
-      - MAGMA
-      - UCX
-      - OMPI
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 22.04
-      - `3.11 <https://www.python.org/downloads/release/python-31113/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
-      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
-      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
-      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
+To find the right image tag, see the :ref:`PyTorch on ROCm installation
+documentation <rocm-install-on-linux:pytorch-docker-support>` for a list of
+available ``rocm/pytorch`` images.

 Key ROCm libraries for PyTorch
 ================================================================================
@@ -466,7 +338,7 @@ with ROCm.
    * - Library
      - Description

-    * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_ 
+    * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
      - Audio and signal processing library for PyTorch. Provides utilities for
        audio I/O, signal and data processing functions, datasets, model
        implementations, and application components for audio and speech
@@ -493,11 +365,11 @@ with ROCm.
        and popular datasets for natural language processing, including
        tokenization, vocabulary management, and text embeddings.

-        **Note:** ``torchtext`` does not implement ROCm-specific kernels. 
+        **Note:** ``torchtext`` does not implement ROCm-specific kernels.
        ROCm acceleration is provided through the underlying PyTorch framework
        and ROCm library integration. Only official release exists.

-    * - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
+    * - `torchdata <https://meta-pytorch.org/data/beta/index.html#torchdata>`_
      - Beta library of common modular data loading primitives for easily
        constructing flexible and performant data pipelines, with features still
        in prototype stage.
@@ -599,7 +471,7 @@ Known issues and notes for PyTorch 2.7 with ROCm 7.0
 ================================================================================

 - The ``matmul.allow_fp16_reduced_precision_reduction`` and
-  ``matmul.allow_bf16_reduced_precision_reduction`` options under 
-  ``torch.backends.cuda`` are not supported. As a result, 
+  ``matmul.allow_bf16_reduced_precision_reduction`` options under
+  ``torch.backends.cuda`` are not supported. As a result,
  reduced-precision reductions using FP16 or BF16 accumulation types are not
  available.
--- a/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
@@ -27,7 +27,7 @@ Supported Devices
 ================================================================================

 - **Officially Supported**: AMD Instinct MI300X
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
+- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210


 Supported models and features
--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -47,80 +47,15 @@ fixes, updates, and support for the latest ROCM versions.
 .. _tensorflow-docker-compat:

 Docker image compatibility
-===============================================================================
+================================================================================

-.. |docker-icon| raw:: html
+AMD provides preconfigured Docker images with TensorFlow and the ROCm backend.
+These images are published on `Docker Hub <https://hub.docker.com/r/rocm/tensorflow>`__ and are the
+recommended way to get started with deep learning with TensorFlow on ROCm.

-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `TensorFlow images
-<https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
-Docker Hub. The following Docker image tags and associated inventories are
-validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
-the |docker-icon| icon to view the image on Docker Hub.
-
-.. list-table:: TensorFlow Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - TensorFlow
-      - Ubuntu
-      - Python
-      - TensorBoard
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
+To find the right image tag, see the :ref:`TensorFlow on ROCm installation
+documentation <rocm-install-on-linux:tensorflow-docker-support>` for a list of
+available ``rocm/tensorflow`` images.


 Critical ROCm libraries for TensorFlow
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -89,15 +89,15 @@ project = "ROCm Documentation"
 project_path = os.path.abspath(".").replace("\\", "/")
 author = "Advanced Micro Devices, Inc."
 copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
-version = "7.0.1"
-release = "7.0.1"
+version = "7.0.2"
+release = "7.0.2"
 setting_all_article_info = True
 all_article_info_os = ["linux", "windows"]
 all_article_info_author = ""

 # pages with specific settings
 article_pages = [
-    {"file": "about/release-notes", "os": ["linux"], "date": "2025-09-17"},
+    {"file": "about/release-notes", "os": ["linux"], "date": "2025-10-10"},
    {"file": "release/changelog", "os": ["linux"],},
    {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
@@ -110,11 +110,15 @@ article_pages = [
    {"file": "compatibility/ml-compatibility/taichi-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/ray-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/llama-cpp-compatibility", "os": ["linux"]},
+    {"file": "compatibility/ml-compatibility/flashinfer-compatibility", "os": ["linux"]},
    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/system-setup/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/system-setup/multi-node-setup", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/system-setup/prerequisite-system-validation", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/system-setup/system-health-check", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
@@ -127,7 +131,9 @@ article_pages = [
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.6", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-megatron", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]},
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
@@ -0,0 +1,188 @@
+dockers:
+  - pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c
+    components:
+      ROCm: 6.4.1
+      vLLM: 0.10.1 (0.10.1rc2.dev409+g0b6bf6691.rocm641)
+      PyTorch: 2.7.0+gitf717b2a
+      hipBLASLt: 0.15
+model_groups:
+  - group: Meta Llama
+    tag: llama
+    models:
+    - model: Llama 3.1 8B
+      mad_tag: pyt_vllm_llama-3.1-8b
+      model_repo: meta-llama/Llama-3.1-8B-Instruct
+      url: https://huggingface.co/meta-llama/Llama-3.1-8B
+      precision: float16
+      config:
+        tp: 1
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Llama 3.1 70B
+      mad_tag: pyt_vllm_llama-3.1-70b
+      model_repo: meta-llama/Llama-3.1-70B-Instruct
+      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
+      precision: float16
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Llama 3.1 405B
+      mad_tag: pyt_vllm_llama-3.1-405b
+      model_repo: meta-llama/Llama-3.1-405B-Instruct
+      url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
+      precision: float16
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Llama 2 70B
+      mad_tag: pyt_vllm_llama-2-70b
+      model_repo: meta-llama/Llama-2-70b-chat-hf
+      url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+      precision: float16
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 4096
+        max_num_batched_tokens: 4096
+        max_model_len: 4096
+    - model: Llama 3.1 8B FP8
+      mad_tag: pyt_vllm_llama-3.1-8b_fp8
+      model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
+      url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
+      precision: float8
+      config:
+        tp: 1
+        dtype: auto
+        kv_cache_dtype: fp8
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Llama 3.1 70B FP8
+      mad_tag: pyt_vllm_llama-3.1-70b_fp8
+      model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
+      url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
+      precision: float8
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: fp8
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Llama 3.1 405B FP8
+      mad_tag: pyt_vllm_llama-3.1-405b_fp8
+      model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
+      url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
+      precision: float8
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: fp8
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+  - group: Mistral AI
+    tag: mistral
+    models:
+    - model: Mixtral MoE 8x7B
+      mad_tag: pyt_vllm_mixtral-8x7b
+      model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
+      url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+      precision: float16
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 32768
+        max_num_batched_tokens: 32768
+        max_model_len: 8192
+    - model: Mixtral MoE 8x22B
+      mad_tag: pyt_vllm_mixtral-8x22b
+      model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
+      url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
+      precision: float16
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 65536
+        max_num_batched_tokens: 65536
+        max_model_len: 8192
+    - model: Mixtral MoE 8x7B FP8
+      mad_tag: pyt_vllm_mixtral-8x7b_fp8
+      model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+      url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+      precision: float8
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: fp8
+        max_seq_len_to_capture: 32768
+        max_num_batched_tokens: 32768
+        max_model_len: 8192
+    - model: Mixtral MoE 8x22B FP8
+      mad_tag: pyt_vllm_mixtral-8x22b_fp8
+      model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+      url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+      precision: float8
+      config:
+        tp: 8
+        dtype: auto
+        kv_cache_dtype: fp8
+        max_seq_len_to_capture: 65536
+        max_num_batched_tokens: 65536
+        max_model_len: 8192
+  - group: Qwen
+    tag: qwen
+    models:
+    - model: QwQ-32B
+      mad_tag: pyt_vllm_qwq-32b
+      model_repo: Qwen/QwQ-32B
+      url: https://huggingface.co/Qwen/QwQ-32B
+      precision: float16
+      config:
+        tp: 1
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 131072
+        max_num_batched_tokens: 131072
+        max_model_len: 8192
+    - model: Qwen3 30B A3B
+      mad_tag: pyt_vllm_qwen3-30b-a3b
+      model_repo: Qwen/Qwen3-30B-A3B
+      url: https://huggingface.co/Qwen/Qwen3-30B-A3B
+      precision: float16
+      config:
+        tp: 1
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 32768
+        max_num_batched_tokens: 32768
+        max_model_len: 8192
+  - group: Microsoft Phi
+    tag: phi
+    models:
+    - model: Phi-4
+      mad_tag: pyt_vllm_phi-4
+      model_repo: microsoft/phi-4
+      url: https://huggingface.co/microsoft/phi-4
+      config:
+        tp: 1
+        dtype: auto
+        kv_cache_dtype: auto
+        max_seq_len_to_capture: 16384
+        max_num_batched_tokens: 16384
+        max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,188 +1,316 @@
 dockers:
-  - pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909
-    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c
+  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
    components:
-      ROCm: 6.4.1
-      vLLM: 0.10.1 (0.10.1rc2.dev409+g0b6bf6691.rocm641)
-      PyTorch: 2.7.0+gitf717b2a
-      hipBLASLt: 0.15
+      ROCm: 7.0.0
+      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
+      PyTorch: 2.9.0a0+git1c57644
+      hipBLASLt: 1.0.0
+    dockerfile:
+      commit: 790d22168820507f3105fef29596549378cfe399
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
-    - model: Llama 3.1 8B
-      mad_tag: pyt_vllm_llama-3.1-8b
-      model_repo: meta-llama/Llama-3.1-8B-Instruct
-      url: https://huggingface.co/meta-llama/Llama-3.1-8B
-      precision: float16
-      config:
-        tp: 1
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Llama 3.1 70B
-      mad_tag: pyt_vllm_llama-3.1-70b
-      model_repo: meta-llama/Llama-3.1-70B-Instruct
-      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-      precision: float16
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Llama 3.1 405B
-      mad_tag: pyt_vllm_llama-3.1-405b
-      model_repo: meta-llama/Llama-3.1-405B-Instruct
-      url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
-      precision: float16
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Llama 2 70B
-      mad_tag: pyt_vllm_llama-2-70b
-      model_repo: meta-llama/Llama-2-70b-chat-hf
-      url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-      precision: float16
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 4096
-        max_num_batched_tokens: 4096
-        max_model_len: 4096
-    - model: Llama 3.1 8B FP8
-      mad_tag: pyt_vllm_llama-3.1-8b_fp8
-      model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
-      url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
-      precision: float8
-      config:
-        tp: 1
-        dtype: auto
-        kv_cache_dtype: fp8
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Llama 3.1 70B FP8
-      mad_tag: pyt_vllm_llama-3.1-70b_fp8
-      model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
-      url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
-      precision: float8
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: fp8
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Llama 3.1 405B FP8
-      mad_tag: pyt_vllm_llama-3.1-405b_fp8
-      model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
-      url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
-      precision: float8
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: fp8
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
+      - model: Llama 2 70B
+        mad_tag: pyt_vllm_llama-2-70b
+        model_repo: meta-llama/Llama-2-70b-chat-hf
+        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 4096
+          max_model_len: 4096
+      - model: Llama 3.1 8B
+        mad_tag: pyt_vllm_llama-3.1-8b
+        model_repo: meta-llama/Llama-3.1-8B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 8B FP8
+        mad_tag: pyt_vllm_llama-3.1-8b_fp8
+        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B
+        mad_tag: pyt_vllm_llama-3.1-405b
+        model_repo: meta-llama/Llama-3.1-405B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B FP8
+        mad_tag: pyt_vllm_llama-3.1-405b_fp8
+        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B MXFP4
+        mad_tag: pyt_vllm_llama-3.1-405b_fp4
+        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B
+        mad_tag: pyt_vllm_llama-3.3-70b
+        model_repo: meta-llama/Llama-3.3-70B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B FP8
+        mad_tag: pyt_vllm_llama-3.3-70b_fp8
+        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B MXFP4
+        mad_tag: pyt_vllm_llama-3.3-70b_fp4
+        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 4 Scout 17Bx16E
+        mad_tag: pyt_vllm_llama-4-scout-17b-16e
+        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E FP8
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: DeepSeek
+    tag: deepseek
+    models:
+      - model: DeepSeek R1 0528 FP8
+        mad_tag: pyt_vllm_deepseek-r1
+        model_repo: deepseek-ai/DeepSeek-R1-0528
+        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_seqs: 1024
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: OpenAI GPT OSS
+    tag: gpt-oss
+    models:
+      - model: GPT OSS 20B
+        mad_tag: pyt_vllm_gpt-oss-20b
+        model_repo: openai/gpt-oss-20b
+        url: https://huggingface.co/openai/gpt-oss-20b
+        precision: bfloat16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
+      - model: GPT OSS 120B
+        mad_tag: pyt_vllm_gpt-oss-120b
+        model_repo: openai/gpt-oss-120b
+        url: https://huggingface.co/openai/gpt-oss-120b
+        precision: bfloat16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
  - group: Mistral AI
    tag: mistral
    models:
-    - model: Mixtral MoE 8x7B
-      mad_tag: pyt_vllm_mixtral-8x7b
-      model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
-      url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-      precision: float16
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 32768
-        max_num_batched_tokens: 32768
-        max_model_len: 8192
-    - model: Mixtral MoE 8x22B
-      mad_tag: pyt_vllm_mixtral-8x22b
-      model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
-      url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
-      precision: float16
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 65536
-        max_num_batched_tokens: 65536
-        max_model_len: 8192
-    - model: Mixtral MoE 8x7B FP8
-      mad_tag: pyt_vllm_mixtral-8x7b_fp8
-      model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-      url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-      precision: float8
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: fp8
-        max_seq_len_to_capture: 32768
-        max_num_batched_tokens: 32768
-        max_model_len: 8192
-    - model: Mixtral MoE 8x22B FP8
-      mad_tag: pyt_vllm_mixtral-8x22b_fp8
-      model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-      url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-      precision: float8
-      config:
-        tp: 8
-        dtype: auto
-        kv_cache_dtype: fp8
-        max_seq_len_to_capture: 65536
-        max_num_batched_tokens: 65536
-        max_model_len: 8192
+      - model: Mixtral MoE 8x7B
+        mad_tag: pyt_vllm_mixtral-8x7b
+        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x7B FP8
+        mad_tag: pyt_vllm_mixtral-8x7b_fp8
+        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B
+        mad_tag: pyt_vllm_mixtral-8x22b
+        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B FP8
+        mad_tag: pyt_vllm_mixtral-8x22b_fp8
+        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
  - group: Qwen
    tag: qwen
    models:
-    - model: QwQ-32B
-      mad_tag: pyt_vllm_qwq-32b
-      model_repo: Qwen/QwQ-32B
-      url: https://huggingface.co/Qwen/QwQ-32B
-      precision: float16
-      config:
-        tp: 1
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 131072
-        max_num_batched_tokens: 131072
-        max_model_len: 8192
-    - model: Qwen3 30B A3B
-      mad_tag: pyt_vllm_qwen3-30b-a3b
-      model_repo: Qwen/Qwen3-30B-A3B
-      url: https://huggingface.co/Qwen/Qwen3-30B-A3B
-      precision: float16
-      config:
-        tp: 1
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 32768
-        max_num_batched_tokens: 32768
-        max_model_len: 8192
+      - model: Qwen3 8B
+        mad_tag: pyt_vllm_qwen3-8b
+        model_repo: Qwen/Qwen3-8B
+        url: https://huggingface.co/Qwen/Qwen3-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 32B
+        mad_tag: pyt_vllm_qwen3-32b
+        model_repo: Qwen/Qwen3-32b
+        url: https://huggingface.co/Qwen/Qwen3-32B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B
+        mad_tag: pyt_vllm_qwen3-30b-a3b
+        model_repo: Qwen/Qwen3-30B-A3B
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B FP8
+        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
+        model_repo: Qwen/Qwen3-30B-A3B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B
+        mad_tag: pyt_vllm_qwen3-235b-a22b
+        model_repo: Qwen/Qwen3-235B-A22B
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B FP8
+        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
+        model_repo: Qwen/Qwen3-235B-A22B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
  - group: Microsoft Phi
    tag: phi
    models:
-    - model: Phi-4
-      mad_tag: pyt_vllm_phi-4
-      model_repo: microsoft/phi-4
-      url: https://huggingface.co/microsoft/phi-4
-      config:
-        tp: 1
-        dtype: auto
-        kv_cache_dtype: auto
-        max_seq_len_to_capture: 16384
-        max_num_batched_tokens: 16384
-        max_model_len: 8192
+      - model: Phi-4
+        mad_tag: pyt_vllm_phi-4
+        model_repo: microsoft/phi-4
+        url: https://huggingface.co/microsoft/phi-4
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 16384
+          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/training/jax-maxtext-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/jax-maxtext-benchmark-models.yaml
@@ -1,12 +1,4 @@
 dockers:
-  - pull_tag: rocm/jax-training:maxtext-v25.7
-    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
-    components:
-      ROCm: 6.4.1
-      JAX: 0.5.0
-      Python: 3.10.12
-      Transformer Engine: 2.1.0+90d703dd
-      hipBLASLt: 1.x.x
  - pull_tag: rocm/jax-training:maxtext-v25.7-jax060
    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
    components:
@@ -15,6 +7,14 @@ dockers:
      Python: 3.10.12
      Transformer Engine: 2.1.0+90d703dd
      hipBLASLt: 1.1.0-499ece1c21
+  - pull_tag: rocm/jax-training:maxtext-v25.7
+    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
+    components:
+      ROCm: 6.4.1
+      JAX: 0.5.0
+      Python: 3.10.12
+      Transformer Engine: 2.1.0+90d703dd
+      hipBLASLt: 1.x.x
 model_groups:
  - group: Meta Llama
    tag: llama
--- a/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
@@ -1,13 +1,12 @@
 dockers:
-  - pull_tag: rocm/megatron-lm:v25.7_py310
-    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
+  - pull_tag: rocm/megatron-lm:v25.8_py310
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
    components:
-      ROCm: 6.4.2
-      Primus: v0.1.0-rc1
+      ROCm: 6.4.3
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
-      Transformer Engine: 2.1.0.dev0+ba586519
-      hipBLASLt: 37ba1d36
+      Transformer Engine: 2.2.0.dev0+54dd2bdc
+      hipBLASLt: d1b517fc7a
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.7-benchmark-models.yaml
@@ -0,0 +1,49 @@
+dockers:
+  - pull_tag: rocm/megatron-lm:v25.7_py310
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
+    components:
+      ROCm: 6.4.2
+      Primus: v0.1.0-rc1
+      PyTorch: 2.8.0a0+gitd06a406
+      Python: "3.10"
+      Transformer Engine: 2.1.0.dev0+ba586519
+      hipBLASLt: 37ba1d36
+      Triton: 3.3.0
+      RCCL: 2.22.3
+model_groups:
+  - group: Meta Llama
+    tag: llama
+    models:
+      - model: Llama 3.3 70B
+        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
+      - model: Llama 3.1 8B
+        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
+      - model: Llama 3.1 70B
+        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
+      - model: Llama 3.1 70B (proxy)
+        mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
+      - model: Llama 2 7B
+        mad_tag: pyt_megatron_lm_train_llama-2-7b
+      - model: Llama 2 70B
+        mad_tag: pyt_megatron_lm_train_llama-2-70b
+  - group: DeepSeek
+    tag: deepseek
+    models:
+      - model: DeepSeek-V3 (proxy)
+        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
+      - model: DeepSeek-V2-Lite
+        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
+  - group: Mistral AI
+    tag: mistral
+    models:
+      - model: Mixtral 8x7B
+        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
+      - model: Mixtral 8x22B (proxy)
+        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
+  - group: Qwen
+    tag: qwen
+    models:
+      - model: Qwen 2.5 7B
+        mad_tag: pyt_megatron_lm_train_qwen2.5-7b
+      - model: Qwen 2.5 72B
+        mad_tag: pyt_megatron_lm_train_qwen2.5-72b
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
@@ -0,0 +1,58 @@
+dockers:
+  - pull_tag: rocm/megatron-lm:v25.7_py310
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
+    components:
+      ROCm: 6.4.2
+      Primus: v0.1.0-rc1
+      PyTorch: 2.8.0a0+gitd06a406
+      Python: "3.10"
+      Transformer Engine: 2.1.0.dev0+ba586519
+      hipBLASLt: 37ba1d36
+      Triton: 3.3.0
+      RCCL: 2.22.3
+model_groups:
+  - group: Meta Llama
+    tag: llama
+    models:
+      - model: Llama 3.3 70B
+        mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
+        config_name: llama3.3_70B-pretrain.yaml
+      - model: Llama 3.1 70B
+        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
+        config_name: llama3.1_70B-pretrain.yaml
+      - model: Llama 3.1 8B
+        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
+        config_name: llama3.1_8B-pretrain.yaml
+      - model: Llama 2 7B
+        mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
+        config_name: llama2_7B-pretrain.yaml
+      - model: Llama 2 70B
+        mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
+        config_name: llama2_70B-pretrain.yaml
+  - group: DeepSeek
+    tag: deepseek
+    models:
+      - model: DeepSeek-V3 (proxy)
+        mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
+        config_name: deepseek_v3-pretrain.yaml
+      - model: DeepSeek-V2-Lite
+        mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
+        config_name: deepseek_v2_lite-pretrain.yaml
+  - group: Mistral AI
+    tag: mistral
+    models:
+      - model: Mixtral 8x7B
+        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
+        config_name: mixtral_8x7B_v0.1-pretrain.yaml
+      - model: Mixtral 8x22B (proxy)
+        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
+        config_name: mixtral_8x22B_v0.1-pretrain.yaml
+  - group: Qwen
+    tag: qwen
+    models:
+      - model: Qwen 2.5 7B
+        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
+        config_name: primus_qwen2.5_7B-pretrain.yaml
+      - model: Qwen 2.5 72B
+        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
+        config_name: qwen2.5_72B-pretrain.yaml
--- a/docs/data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
@@ -1,13 +1,13 @@
 dockers:
-  - pull_tag: rocm/megatron-lm:v25.7_py310
-    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
+  - pull_tag: rocm/megatron-lm:v25.8_py310
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
    components:
-      ROCm: 6.4.2
-      Primus: v0.1.0-rc1
+      ROCm: 6.4.3
+      Primus: 927a717
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
-      Transformer Engine: 2.1.0.dev0+ba586519
-      hipBLASLt: 37ba1d36
+      Transformer Engine: 2.2.0.dev0+54dd2bdc
+      hipBLASLt: d1b517fc7a
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
--- a/docs/how-to/deep-learning-rocm.rst
+++ b/docs/how-to/deep-learning-rocm.rst
@@ -128,10 +128,22 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/llama-cpp-install.html"><i class="fas fa-link fa-lg"></i></a>
      - 
        - `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/llama-cpp-install.html#use-a-prebuilt-docker-image-with-llama-cpp-pre-installed>`__
+        - `ROCm Base Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/llama-cpp-install.html#build-your-own-docker-image>`__
      - .. raw:: html

          <a href="https://github.com/ROCm/llama.cpp"><i class="fab fa-github fa-lg"></i></a>

+    * - `FlashInfer <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/flashinfer-compatibility.html>`__
+      - .. raw:: html
+
+          <a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/flashinfer-install.html"><i class="fas fa-link fa-lg"></i></a>
+      - 
+        - `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/flashinfer-install.html#use-a-prebuilt-docker-image-with-flashinfer-pre-installed>`__
+        - `ROCm Base Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/flashinfer-install.html#build-your-own-docker-image>`__
+      - .. raw:: html
+
+          <a href="https://github.com/ROCm/flashinfer"><i class="fab fa-github fa-lg"></i></a>
+
 Learn how to use your ROCm deep learning environment for training, fine-tuning, inference, and performance optimization
 through the following guides.

--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.1-20250909.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.1-20250909.rst
@@ -0,0 +1,448 @@
+:orphan:
+
+.. meta::
+   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the ROCm vLLM Docker image.
+   :keywords: model, MAD, automation, dashboarding, validate
+
+**********************************
+vLLM inference performance testing
+**********************************
+
+.. caution::
+
+   This documentation does not reflect the latest version of ROCm vLLM
+   inference performance documentation. See :doc:`../vllm` for the latest version.
+
+.. _vllm-benchmark-unified-docker-909:
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+
+   The `ROCm vLLM Docker <{{ docker.docker_hub_url }}>`_ image offers
+   a prebuilt, optimized environment for validating large language model (LLM)
+   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
+   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
+   accelerators and includes the following components:
+
+   .. list-table::
+      :header-rows: 1
+
+      * - Software component
+        - Version
+
+      {% for component_name, component_version in docker.components.items() %}
+      * - {{ component_name }}
+        - {{ component_version }}
+      {% endfor %}
+
+With this Docker image, you can quickly test the :ref:`expected
+inference performance numbers <vllm-benchmark-performance-measurements-909>` for
+MI300X series accelerators.
+
+What's new
+==========
+
+The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <vllm-history>`.
+
+* Upgraded to vLLM v0.10.1.
+
+* Set ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` by default for better performance.
+
+* Set ``VLLM_ROCM_USE_AITER_RMSNORM=0`` by default to avoid various issues with torch compile.
+
+.. _vllm-benchmark-supported-models-909:
+
+Supported models
+================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   .. _vllm-benchmark-available-models-909:
+
+   The following models are supported for inference performance benchmarking
+   with vLLM and ROCm. Some instructions, commands, and recommendations in this
+   documentation might vary by model -- select one to get started.
+
+   .. raw:: html
+
+      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
+         <div class="row gx-0">
+            <div class="col-2 me-1 px-2 model-param-head">Model</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+               <div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+      {% endfor %}
+            </div>
+         </div>
+
+         <div class="row gx-0 pt-1">
+            <div class="col-2 me-1 px-2 model-param-head">Variant</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+         {% set models = model_group.models %}
+         {% for model in models %}
+            {% if models|length % 3 == 0 %}
+               <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% else %}
+               <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% endif %}
+         {% endfor %}
+      {% endfor %}
+            </div>
+         </div>
+      </div>
+
+   .. _vllm-benchmark-vllm-909:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{ model.mad_tag }}
+
+      .. note::
+
+         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
+         Some models require access authorization prior to use via an external license agreement through a third party.
+      {% if model.precision == "float8" and model.model_repo.startswith("amd") %}
+         This model uses FP8 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD accelerators.
+      {% endif %}
+
+      {% endfor %}
+   {% endfor %}
+
+.. _vllm-benchmark-performance-measurements-909:
+
+Performance measurements
+========================
+
+To evaluate performance, the
+`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+page provides reference throughput and serving measurements for inferencing popular AI models.
+
+.. important::
+
+   The performance data presented in
+   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+   only reflects the latest version of this inference benchmarking environment.
+   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
+
+System validation
+=================
+
+Before running AI workloads, it's important to validate that your AMD hardware is configured
+correctly and performing optimally.
+
+If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+before starting training.
+
+To test for optimal performance, consult the recommended :ref:`System health benchmarks
+<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+system's configuration.
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   Pull the Docker image
+   =====================
+
+   Download the `ROCm vLLM Docker image <{{ docker.docker_hub_url }}>`_.
+   Use the following command to pull the Docker image from Docker Hub.
+
+   .. code-block:: shell
+
+      docker pull {{ docker.pull_tag }}
+
+   Benchmarking
+   ============
+
+   Once the setup is complete, choose between two options to reproduce the
+   benchmark results:
+
+   .. _vllm-benchmark-mad-909:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{model.mad_tag}}
+
+      .. tab-set::
+
+         .. tab-item:: MAD-integrated benchmarking
+
+            The following run command is tailored to {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-909` to switch to another available model.
+
+            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
+               directory and install the required packages on the host machine.
+
+               .. code-block:: shell
+
+                  git clone https://github.com/ROCm/MAD
+                  cd MAD
+                  pip install -r requirements.txt
+
+            2. Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
+               using one GPU with the :literal:`{{model.precision}}` data type on the host machine.
+
+               .. code-block:: shell
+
+                  export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
+                  madengine run \
+                      --tags {{model.mad_tag}} \
+                      --keep-model-dir \
+                      --live-output \
+                      --timeout 28800
+
+            MAD launches a Docker container with the name
+            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
+            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
+            and ``{{ model.mad_tag }}_serving.csv``.
+
+            Although the :ref:`available models
+            <vllm-benchmark-available-models-909>` are preconfigured to collect
+            offline throughput and online serving performance data, you can
+            also change the benchmarking parameters. See the standalone
+            benchmarking tab for more information.
+
+            {% if model.tunableop %}
+
+            .. note::
+
+               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
+               TunableOp automatically explores different implementations and configurations of certain PyTorch
+               operators to find the fastest one for your hardware.
+
+               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled (see
+               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable it, include
+               the ``--tunableop on`` argument in your run.
+
+               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the
+               performance-collection run.
+
+            {% endif %}
+
+         .. tab-item:: Standalone benchmarking
+
+            The following commands are optimized for {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-909` to switch to another available model.
+
+            .. seealso::
+
+               For more information on configuration, see the `config files
+               <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__
+               in the MAD repository. Refer to the `vLLM engine <https://docs.vllm.ai/en/latest/configuration/engine_args.html#engineargs>`__
+               for descriptions of available configuration options
+               and `Benchmarking vLLM <https://github.com/vllm-project/vllm/blob/main/benchmarks/README.md>`__ for
+               additional benchmarking information.
+
+            .. rubric:: Launch the container
+
+            You can run the vLLM benchmark tool independently by starting the
+            `Docker container <{{ docker.docker_hub_url }}>`_ as shown
+            in the following snippet.
+
+            .. code-block:: shell
+
+               docker pull {{ docker.pull_tag }}
+               docker run -it \
+                   --device=/dev/kfd \
+                   --device=/dev/dri \
+                   --group-add video \
+                   --shm-size 16G \
+                   --security-opt seccomp=unconfined \
+                   --security-opt apparmor=unconfined \
+                   --cap-add=SYS_PTRACE \
+                   -v $(pwd):/workspace \
+                   --env HUGGINGFACE_HUB_CACHE=/workspace \
+                   --name test \
+                   {{ docker.pull_tag }}
+
+            .. rubric:: Throughput command
+
+            Use the following command to start the throughput benchmark.
+
+            .. code-block:: shell
+
+               model={{ model.model_repo }}
+               tp={{ model.config.tp }}
+               num_prompts=1024
+               in=128
+               out=128
+               dtype={{ model.config.dtype }}
+               kv_cache_dtype={{ model.config.kv_cache_dtype }}
+               max_num_seqs=1024
+               max_seq_len_to_capture={{ model.config.max_seq_len_to_capture }}
+               max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+               max_model_len={{ model.config.max_model_len }}
+
+               vllm bench throughput --model $model \
+                   -tp $tp \
+                   --num-prompts $num_prompts \
+                   --input-len $in \
+                   --output-len $out \
+                   --dtype $dtype \
+                   --kv-cache-dtype $kv_cache_dtype \
+                   --max-num-seqs $max_num_seqs \
+                   --max-seq-len-to-capture $max_seq_len_to_capture \
+                   --max-num-batched-tokens $max_num_batched_tokens \
+                   --max-model-len $max_model_len \
+                   --trust-remote-code \
+                   --output-json ${model}_throughput.json \
+                   --gpu-memory-utilization 0.9
+
+            .. rubric:: Serving command
+
+            1. Start the server using the following command:
+
+               .. code-block:: shell
+
+                  model={{ model.model_repo }}
+                  tp={{ model.config.tp }}
+                  dtype={{ model.config.dtype }}
+                  kv_cache_dtype={{ model.config.kv_cache_dtype }}
+                  max_num_seqs=256
+                  max_seq_len_to_capture={{ model.config.max_seq_len_to_capture }}
+                  max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+                  max_model_len={{ model.config.max_model_len }}
+
+                  vllm serve $model \
+                      -tp $tp \
+                      --dtype $dtype \
+                      --kv-cache-dtype $kv_cache_dtype \
+                      --max-num-seqs $max_num_seqs \
+                      --max-seq-len-to-capture $max_seq_len_to_capture \
+                      --max-num-batched-tokens $max_num_batched_tokens \
+                      --max-model-len $max_model_len \
+                      --no-enable-prefix-caching \
+                      --swap-space 16 \
+                      --disable-log-requests \
+                      --trust-remote-code \
+                      --gpu-memory-utilization 0.9
+
+               Wait until the model has loaded and the server is ready to accept requests.
+
+            2. On another terminal on the same machine, run the benchmark:
+
+               .. code-block:: shell
+
+                  # Connect to the container
+                  docker exec -it test bash
+
+                  # Wait for the server to start
+                  until curl -s http://localhost:8000/v1/models; do sleep 30; done
+
+                  # Run the benchmark
+                  model={{ model.model_repo }}
+                  max_concurrency=1
+                  num_prompts=10
+                  in=128
+                  out=128
+                  vllm bench serve --model $model \
+                      --percentile-metrics "ttft,tpot,itl,e2el" \
+                      --dataset-name random \
+                      --ignore-eos \
+                      --max-concurrency $max_concurrency \
+                      --num-prompts $num_prompts \
+                      --random-input-len $in \
+                      --random-output-len $out \
+                      --trust-remote-code \
+                      --save-result \
+                      --result-filename ${model}_serving.json
+
+            .. note::
+
+               For improved performance with certain Mixture of Experts models, such as Mixtral 8x22B,
+               try adding ``export VLLM_ROCM_USE_AITER=1`` to your commands.
+
+               If you encounter the following error, pass your access-authorized Hugging
+               Face token to the gated models.
+
+               .. code-block::
+
+                  OSError: You are trying to access a gated repo.
+
+                  # pass your HF_TOKEN
+                  export HF_TOKEN=$your_personal_hf_token
+
+            .. raw:: html
+
+               <style>
+               mjx-container[jax="CHTML"][display="true"] {
+                  text-align: left;
+                  margin: 0;
+               }
+               </style>
+
+            .. note::
+
+               Throughput is calculated as:
+
+               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
+
+               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
+      {% endfor %}
+   {% endfor %}
+
+Advanced usage
+==============
+
+For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
+see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/docs/dev-docker/README.md>`__.
+
+Reproducing the Docker image
+----------------------------
+
+To reproduce this ROCm/vLLM Docker image release, follow these steps:
+
+1. Clone the `vLLM repository <https://github.com/ROCm/vllm>`__.
+
+   .. code-block:: shell
+
+      git clone https://github.com/ROCm/vllm.git
+
+2. Checkout the specific release commit.
+
+   .. code-block:: shell
+
+      cd vllm
+      git checkout 6663000a391911eba96d7864a26ac42b07f6ef29
+
+3. Build the Docker image. Replace ``vllm-rocm`` with your desired image tag.
+
+   .. code-block:: shell
+
+      docker build -f docker/Dockerfile.rocm -t vllm-rocm .
+
+Further reading
+===============
+
+- To learn more about the options for latency and throughput benchmark scripts,
+  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
+
+- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
+
+- To learn more about system settings and management practices to configure your system for
+  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+
+- See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
+  a brief introduction to vLLM and optimization strategies.
+
+- For application performance optimization strategies for HPC and AI workloads,
+  including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
+
+- For a list of other ready-made Docker images for AI with ROCm, see
+  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
+
+Previous versions
+=================
+
+See :doc:`vllm-history` to find documentation for previous releases
+of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521.rst
@@ -120,7 +120,7 @@ vLLM inference performance testing
   ==================================

   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
-   see the developer's guide at `<https://github.com/ROCm/vllm/blob/main/docs/dev-docker/README.md>`__.
+   see the developer's guide at `<https://github.com/ROCm/vllm/blob/7bb0618b1fe725b7d4fad9e525aa44da12c94a8b/docs/dev-docker/README.md>`__.

   System validation
   =================
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
@@ -7,7 +7,7 @@ vLLM inference performance testing version history
 This table lists previous versions of the ROCm vLLM inference Docker image for
 inference performance testing. For detailed information about available models
 for benchmarking, see the version-specific documentation. You can find tagged
-previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c>`__.
+previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.docker.com/r/rocm/vllm/tags>`__.

 .. list-table::
   :header-rows: 1
@@ -16,14 +16,23 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
     - Components
     - Resources

-   * - ``rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909``
+   * - ``rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006``
       (latest)
+     -
+       * ROCm 7.0.0
+       * vLLM 0.10.2
+       * PyTorch 2.9.0
+     -
+       * :doc:`Documentation <../vllm>`
+       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5>`__
+
+   * - ``rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909``
     -
       * ROCm 6.4.1
       * vLLM 0.10.1
       * PyTorch 2.7.0
     -
-       * :doc:`Documentation <../vllm>`
+       * :doc:`Documentation <vllm-0.10.1-20250909>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c>`__

   * - ``rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812``
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
@@ -16,7 +16,7 @@ PyTorch inference performance testing

   The `ROCm PyTorch Docker <https://hub.docker.com/r/rocm/pytorch/tags>`_ image offers a prebuilt,
   optimized environment for testing model inference performance on AMD Instinct™ MI300X series
-   accelerators. This guide demonstrates how to use the AMD Model Automation and Dashboarding (MAD)
+   GPUs. This guide demonstrates how to use the AMD Model Automation and Dashboarding (MAD)
   tool with the ROCm PyTorch container to test inference performance on various models efficiently.

   .. _pytorch-inference-benchmark-available-models:
@@ -175,7 +175,7 @@ Further reading
 - To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.

 - To learn more about system settings and management practices to configure your system for
-  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.

 - For application performance optimization strategies for HPC and AI workloads,
  including inference with vLLM, see :doc:`../../inference-optimization/workload`.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.rst
@@ -23,7 +23,7 @@ improved efficiency and throughput.
   serving engine for large language models (LLMs) and vision models. The
   ROCm-enabled `SGLang base Docker image <{{ docker.docker_hub_url }}>`__
   bundles SGLang with PyTorch, which is optimized for AMD Instinct MI300X series
-   accelerators. It includes the following software components:
+   GPUs. It includes the following software components:

   .. list-table::
      :header-rows: 1
@@ -37,7 +37,7 @@ improved efficiency and throughput.
      {% endfor %}

 The following guides on setting up and running SGLang and Mooncake for disaggregated
-distributed inference on a Slurm cluster using AMD Instinct MI300X series accelerators backed by
+distributed inference on a Slurm cluster using AMD Instinct MI300X series GPUs backed by
 Mellanox CX-7 NICs.

 Prerequisites
@@ -111,7 +111,7 @@ Build the Docker image
 ----------------------

 Get the Dockerfile located in
-`<https://github.com/ROCm/MAD/blob/develop/docker/sglang_dissag_inference.ubuntu.amd.Dockerfile>`__.
+`<https://github.com/ROCm/MAD/blob/develop/docker/sglang_disagg_inference.ubuntu.amd.Dockerfile>`__.
 It uses `lmsysorg/sglang:v0.5.2rc1-rocm700-mi30x
 <https://hub.docker.com/layers/lmsysorg/sglang/v0.4.9.post1-rocm630/images/sha256-2f6b1748e4bcc70717875a7da76c87795fd8aa46a9646e08d38aa7232fc78538>`__
 as the base Docker image and installs the necessary components for Mooncake, etcd, and Mellanox network
@@ -128,7 +128,7 @@ drivers.
 Benchmarking
 ============

-The `<https://github.com/ROCm/MAD/tree/develop/scripts/sglang_dissag>`__
+The `<https://github.com/ROCm/MAD/tree/develop/scripts/sglang_disagg>`__
 repository contains scripts to launch SGLang inference with prefill/decode
 disaggregation via Mooncake for supported models.

@@ -236,7 +236,7 @@ Further reading
 - See the base upstream Docker image on `Docker Hub <https://hub.docker.com/layers/lmsysorg/sglang/v0.5.2rc1-rocm700-mi30x/images/sha256-10c4ee502ddba44dd8c13325e6e03868bfe7f43d23d0a44780a8ee8b393f4729>`__.

 - To learn more about system settings and management practices to configure your system for
-  MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`__.
+  MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`__.

 - For application performance optimization strategies for HPC and AI workloads,
  including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
@@ -6,45 +6,63 @@
 vLLM inference performance testing
 **********************************

-.. _vllm-benchmark-unified-docker-909:
+.. _vllm-benchmark-unified-docker-930:

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

   {% set docker = data.dockers[0] %}

-   The `ROCm vLLM Docker <{{ docker.docker_hub_url }}>`_ image offers
-   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
-   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
-   accelerators and includes the following components:
+   The `ROCm vLLM Docker <{{ docker.docker_hub_url }}>`_ image offers a
+   prebuilt, optimized environment for validating large language model (LLM)
+   inference performance on AMD Instinct™ MI355X, MI350X, MI325X and MI300X
+   GPUs. This ROCm vLLM Docker image integrates vLLM and PyTorch tailored
+   specifically for AMD data center GPUs and includes the following components:

-   .. list-table::
-      :header-rows: 1
+   .. tab-set::

-      * - Software component
-        - Version
+      .. tab-item:: {{ docker.pull_tag }}

-      {% for component_name, component_version in docker.components.items() %}
-      * - {{ component_name }}
-        - {{ component_version }}
-      {% endfor %}
+         .. list-table::
+            :header-rows: 1
+
+            * - Software component
+              - Version
+
+            {% for component_name, component_version in docker.components.items() %}
+            * - {{ component_name }}
+              - {{ component_version }}
+            {% endfor %}

 With this Docker image, you can quickly test the :ref:`expected
-inference performance numbers <vllm-benchmark-performance-measurements-909>` for
-MI300X series accelerators.
+inference performance numbers <vllm-benchmark-performance-measurements-930>` for
+AMD Instinct GPUs.

 What's new
 ==========

 The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <previous-versions/vllm-history>`.

-* Upgraded to vLLM v0.10.1.
+* Added support for AMD Instinct MI355X and MI350X GPUs.

-* Set ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1`` by default for better performance.
+* Added support and benchmarking instructions for the following models. See :ref:`vllm-benchmark-supported-models-930`.

-* Set ``VLLM_ROCM_USE_AITER_RMSNORM=0`` by default to avoid various issues with torch compile.
+  * Llama 4 Scout and Maverick

-.. _vllm-benchmark-supported-models-909:
+  * DeepSeek R1 0528 FP8
+
+  * MXFP4 models (MI355X and MI350X only): Llama 3.3 70B MXFP4 and Llama 3.1 405B MXFP4
+
+  * GPT OSS 20B and 120B
+
+  * Qwen 3 32B, 30B-A3B, and 235B-A22B
+
+* Removed the deprecated ``--max-seq-len-to-capture`` flag.
+
+* ``--gpu-memory-utilization`` is now configurable via the `configuration files
+  <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__ in the MAD
+  repository.
+
+.. _vllm-benchmark-supported-models-930:

 Supported models
 ================
@@ -54,11 +72,12 @@ Supported models
   {% set docker = data.dockers[0] %}
   {% set model_groups = data.model_groups %}

-   .. _vllm-benchmark-available-models-909:
+   .. _vllm-benchmark-available-models-930:

   The following models are supported for inference performance benchmarking
   with vLLM and ROCm. Some instructions, commands, and recommendations in this
-   documentation might vary by model -- select one to get started.
+   documentation might vary by model -- select one to get started. MXFP4 models
+   are only supported on MI355X and MI350X GPUs.

   .. raw:: html

@@ -67,7 +86,7 @@ Supported models
            <div class="col-2 me-1 px-2 model-param-head">Model</div>
            <div class="row col-10 pe-0">
      {% for model_group in model_groups %}
-               <div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+               <div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
      {% endfor %}
            </div>
         </div>
@@ -89,25 +108,35 @@ Supported models
         </div>
      </div>

-   .. _vllm-benchmark-vllm-909:
+   .. _vllm-benchmark-vllm-930:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}

   .. container:: model-doc {{ model.mad_tag }}

+
+      {% if model.precision == "float4" %}
+      .. important::
+
+         MXFP4 is supported only on MI355X and MI350X GPUs.
+      {% endif %}
+
      .. note::

         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
         Some models require access authorization prior to use via an external license agreement through a third party.
      {% if model.precision == "float8" and model.model_repo.startswith("amd") %}
-         This model uses FP8 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD accelerators.
+         This model uses FP8 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
+      {% endif %}
+      {% if model.precision == "float4" and model.model_repo.startswith("amd") %}
+         This model uses FP4 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
      {% endif %}

      {% endfor %}
   {% endfor %}

-.. _vllm-benchmark-performance-measurements-909:
+.. _vllm-benchmark-performance-measurements-930:

 Performance measurements
 ========================
@@ -121,7 +150,7 @@ page provides reference throughput and serving measurements for inferencing popu
   The performance data presented in
   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
   only reflects the latest version of this inference benchmarking environment.
-   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
+   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct GPUs or ROCm software.

 System validation
 =================
@@ -138,13 +167,12 @@ To test for optimal performance, consult the recommended :ref:`System health ben
 <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
 system's configuration.

+Pull the Docker image
+=====================
+
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

   {% set docker = data.dockers[0] %}
-   {% set model_groups = data.model_groups %}
-
-   Pull the Docker image
-   =====================

   Download the `ROCm vLLM Docker image <{{ docker.docker_hub_url }}>`_.
   Use the following command to pull the Docker image from Docker Hub.
@@ -153,13 +181,18 @@ system's configuration.

      docker pull {{ docker.pull_tag }}

-   Benchmarking
-   ============
+Benchmarking
+============
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}

   Once the setup is complete, choose between two options to reproduce the
   benchmark results:

-   .. _vllm-benchmark-mad-909:
+   .. _vllm-benchmark-mad-930:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -171,7 +204,7 @@ system's configuration.
         .. tab-item:: MAD-integrated benchmarking

            The following run command is tailored to {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-909` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.

            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
               directory and install the required packages on the host machine.
@@ -182,8 +215,9 @@ system's configuration.
                  cd MAD
                  pip install -r requirements.txt

-            2. Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-               using one GPU with the :literal:`{{model.precision}}` data type on the host machine.
+            2. On the host machine, use this command to run the performance benchmark test on
+               the `{{model.model}} <{{ model.url }}>`_ model using one node with the
+               :literal:`{{model.precision}}` data type.

               .. code-block:: shell

@@ -191,8 +225,7 @@ system's configuration.
                  madengine run \
                      --tags {{model.mad_tag}} \
                      --keep-model-dir \
-                      --live-output \
-                      --timeout 28800
+                      --live-output

            MAD launches a Docker container with the name
            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
@@ -200,7 +233,7 @@ system's configuration.
            and ``{{ model.mad_tag }}_serving.csv``.

            Although the :ref:`available models
-            <vllm-benchmark-available-models-909>` are preconfigured to collect
+            <vllm-benchmark-available-models-930>` are preconfigured to collect
            offline throughput and online serving performance data, you can
            also change the benchmarking parameters. See the standalone
            benchmarking tab for more information.
@@ -225,12 +258,12 @@ system's configuration.
         .. tab-item:: Standalone benchmarking

            The following commands are optimized for {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-909` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.

            .. seealso::

               For more information on configuration, see the `config files
-               <https://github.com/ROCm/MAD-private/tree/develop/scripts/vllm/configs>`__
+               <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__
               in the MAD repository. Refer to the `vLLM engine <https://docs.vllm.ai/en/latest/configuration/engine_args.html#engineargs>`__
               for descriptions of available configuration options
               and `Benchmarking vLLM <https://github.com/vllm-project/vllm/blob/main/benchmarks/README.md>`__ for
@@ -266,13 +299,12 @@ system's configuration.

               model={{ model.model_repo }}
               tp={{ model.config.tp }}
-               num_prompts=1024
-               in=128
-               out=128
-               dtype={{ model.config.dtype }}
+               num_prompts={{ model.config.num_prompts | default(1024) }}
+               in={{ model.config.in | default(128) }}
+               out={{ model.config.in | default(128) }}
+               dtype={{ model.config.dtype | default("auto") }}
               kv_cache_dtype={{ model.config.kv_cache_dtype }}
-               max_num_seqs=1024
-               max_seq_len_to_capture={{ model.config.max_seq_len_to_capture }}
+               max_num_seqs={{ model.config.max_num_seqs | default(1024) }}
               max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
               max_model_len={{ model.config.max_model_len }}

@@ -284,12 +316,11 @@ system's configuration.
                   --dtype $dtype \
                   --kv-cache-dtype $kv_cache_dtype \
                   --max-num-seqs $max_num_seqs \
-                   --max-seq-len-to-capture $max_seq_len_to_capture \
                   --max-num-batched-tokens $max_num_batched_tokens \
                   --max-model-len $max_model_len \
                   --trust-remote-code \
                   --output-json ${model}_throughput.json \
-                   --gpu-memory-utilization 0.9
+                   --gpu-memory-utilization {{ model.config.gpu_memory_utilization | default(0.9) }}

            .. rubric:: Serving command

@@ -302,7 +333,6 @@ system's configuration.
                  dtype={{ model.config.dtype }}
                  kv_cache_dtype={{ model.config.kv_cache_dtype }}
                  max_num_seqs=256
-                  max_seq_len_to_capture={{ model.config.max_seq_len_to_capture }}
                  max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
                  max_model_len={{ model.config.max_model_len }}

@@ -311,7 +341,6 @@ system's configuration.
                      --dtype $dtype \
                      --kv-cache-dtype $kv_cache_dtype \
                      --max-num-seqs $max_num_seqs \
-                      --max-seq-len-to-capture $max_seq_len_to_capture \
                      --max-num-batched-tokens $max_num_batched_tokens \
                      --max-model-len $max_model_len \
                      --no-enable-prefix-caching \
@@ -352,6 +381,9 @@ system's configuration.

            .. note::

+               For improved performance with certain Mixture of Experts models, such as Mixtral 8x22B,
+               try adding ``export VLLM_ROCM_USE_AITER=1`` to your commands.
+
               If you encounter the following error, pass your access-authorized Hugging
               Face token to the gated models.

@@ -390,26 +422,31 @@ see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/d
 Reproducing the Docker image
 ----------------------------

-To reproduce this ROCm/vLLM Docker image release, follow these steps:
+To reproduce this ROCm-enabled vLLM Docker image release, follow these steps:

-1. Clone the `vLLM repository <https://github.com/ROCm/vllm>`__.
-
-   .. code-block:: shell
-
-      git clone https://github.com/ROCm/vllm.git
-
-2. Checkout the specific release commit.
+1. Clone the `vLLM repository <https://github.com/vllm-project/vllm>`__.

   .. code-block:: shell

+      git clone https://github.com/vllm-project/vllm.git
      cd vllm
-      git checkout 6663000a391911eba96d7864a26ac42b07f6ef29

-3. Build the Docker image. Replace ``vllm-rocm`` with your desired image tag.
+2. Use the following command to build the image directly from the specified commit.

-   .. code-block:: shell
+   .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

-      docker build -f docker/Dockerfile.rocm -t vllm-rocm .
+      {% set docker = data.dockers[0] %}
+      .. code-block:: shell
+
+         docker build -f docker/Dockerfile.rocm \
+             --build-arg REMOTE_VLLM=1 \
+             --build-arg VLLM_REPO=https://github.com/ROCm/vllm \
+             --build-arg VLLM_BRANCH="{{ docker.dockerfile.commit }}" \
+             -t vllm-rocm .
+
+   .. tip::
+
+      Replace ``vllm-rocm`` with your desired image tag.

 Further reading
 ===============
@@ -420,7 +457,7 @@ Further reading
 - To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.

 - To learn more about system settings and management practices to configure your system for
-  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.

 - See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
  a brief introduction to vLLM and optimization strategies.
--- a/docs/how-to/rocm-for-ai/install.rst
+++ b/docs/how-to/rocm-for-ai/install.rst
@@ -47,7 +47,7 @@ Deep learning frameworks
 ========================

 ROCm supports deep learning frameworks and libraries including `PyTorch
-<https://pytorch.org/blog/pytorch-for-amd-rocm-platform-now-available-as-python-package>`_, `TensorFlow
+<https://pytorch.org>`_, `TensorFlow
 <https://tensorflow.org>`_, `JAX <https://jax.readthedocs.io/en/latest>`_, and more.

 Review the :doc:`framework installation documentation <../deep-learning-rocm>`. For ease-of-use, it's recommended to use official ROCm prebuilt Docker
@@ -57,4 +57,4 @@ Next steps
 ==========

 After installing ROCm and your desired ML libraries -- and before running AI workloads -- conduct system health benchmarks
-to test the optimal performance of your AMD hardware. See :doc:`system-health-check` to get started.
+to test the optimal performance of your AMD hardware. See :doc:`system-setup/index` to get started.
--- a/docs/how-to/rocm-for-ai/system-setup/index.rst
+++ b/docs/how-to/rocm-for-ai/system-setup/index.rst
@@ -0,0 +1,40 @@
+.. meta::
+   :description: System setup and validation steps for AI training and inference on ROCm
+   :keywords: AMD Instinct, ROCm, GPU, AI, training, inference, benchmarking, performance, validation
+
+*************************************
+System setup for AI workloads on ROCm
+*************************************
+
+Before you begin training or inference on AMD Instinct™ GPUs, complete
+the following system setup and validation steps to ensure optimal performance.
+
+Prerequisite system validation
+==============================
+
+First, confirm that your system meets all software and hardware prerequisites.
+See :doc:`prerequisite-system-validation`.
+
+Docker images for AMD Instinct GPUs
+===================================
+
+AMD provides prebuilt Docker images for AMD Instinct™ MI300X and MI325X
+GPUs. These images include ROCm-enabled deep learning frameworks and
+essential software components. They support single-node and multi-node configurations
+and are ready for training and inference workloads out of the box.
+
+Multi-node training
+-------------------
+
+For instructions on enabling multi-node training, see :doc:`multi-node-setup`.
+
+System optimization and validation
+==================================
+
+Before running workloads, verify that the system is configured correctly and
+operating at peak efficiency. Recommended steps include:
+
+- Disabling NUMA auto-balancing
+- Running system benchmarks to validate hardware performance
+
+For details on running system health checks, see :doc:`system-health-check`.
--- a/docs/how-to/rocm-for-ai/system-setup/multi-node-setup.rst
+++ b/docs/how-to/rocm-for-ai/system-setup/multi-node-setup.rst
@@ -0,0 +1,320 @@
+.. meta::
+   :description: Multi-node setup for AI training
+   :keywords: gpu, accelerator, system, health, validation, bench, perf, performance, rvs, rccl, babel, mi300x, mi325x, flops, bandwidth, rbt, training
+
+.. _rocm-for-ai-multi-node-setup:
+
+*********************************
+Multi-node setup for AI workloads
+*********************************
+
+AMD provides ready-to-use Docker images for AMD Instinct™ MI300X and MI325X
+GPUs containing ROCm-capable deep learning frameworks and essential
+software components. These Docker images can run and leverage multiple nodes if
+they are available. This page describes how to enable the multi-node training
+of AI workloads on AMD Instinct GPUs.
+
+Prerequisites
+=============
+
+Before starting, ensure your environment meets the following requirements:
+
+* Multi-node networking: your cluster should have a configured multi-node network. For setup
+  instructions, see the `Multi-node network configuration for AMD Instinct
+  accelerators
+  <https://instinct.docs.amd.com/projects/gpu-cluster-networking/en/latest/how-to/multi-node-config.html>`__
+  guide in the Instinct documentation.
+
+* ROCm Docker container to simplify environment setup for AI workloads. See the following resources to get started:
+
+  * :doc:`Training a model with Megatron-LM and ROCm <../training/benchmark-docker/megatron-lm>`
+
+  * :doc:`Training a model with PyTorch and ROCm <../training/benchmark-docker/pytorch-training>`
+
+  * :doc:`Training a model with JAX MaxText and ROCm <../training/benchmark-docker/jax-maxtext>`
+
+* Slurm workload manager to run the :ref:`provided examples <multi-node-setup-training-examples>`.
+
+Install required packages
+=========================
+
+To run multi-node workloads, ensure you have all the required packages installed based on your
+network device. For example, on Ubuntu systems:
+
+.. code-block:: shell
+
+   apt install -y iproute2
+
+   apt install -y linux-headers-"$(uname -r)" libelf-dev
+
+   apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core strace libibmad5 libibnetdisc5 ibverbs-providers libibumad-dev libibumad3 libibverbs1 libnl-3-dev libnl-route-3-dev
+
+Compile and install the RoCE library
+------------------------------------
+
+If you're using Broadcom NICs, you need to compile and install the RoCE (RDMA
+over Converged Ethernet) library. See `RoCE cluster network configuration guide
+for AMD Instinct accelerators
+<https://instinct.docs.amd.com/projects/gpu-cluster-networking/en/latest/how-to/roce-network-config.html#roce-cluster-network-configuration-guide-for-amd-instinct-accelerators>`__
+for more information.
+
+See the `Ethernet networking guide for AMD
+Instinct MI300X GPU clusters: Compiling Broadcom NIC software from source
+<https://docs.broadcom.com/doc/957608-AN2XX#page=81>`_ for more details.
+
+.. important::
+
+   It is crucial to install the exact same version of the RoCE library that
+   is installed on your host system. Also, ensure that the path to these
+   libraries on the host is correctly mounted into your Docker container.
+   Failure to do so can lead to compatibility issues and communication
+   failures.
+
+1. Set ``BUILD_DIR`` to the path on the host system where the Broadcom drivers and ``bnxt_rocelib`` source are located.
+   Then, navigate to the ``bnxt_rocelib`` directory.
+
+   .. code-block:: shell
+
+      export BUILD_DIR=/path/to/your/broadcom_drivers_on_host
+      cd $BUILD_DIR/drivers_linux/bnxt_rocelib/
+
+2. The ``bnxt_rocelib`` directory contains a version of ``libbnxt_re`` in a zipped ``.tar.gz`` file.
+
+   .. code-block:: shell
+
+      tar -xf libbnxt_re-a.b.c.d.tar.gz
+      cd libbnxt_re-a.b.c.d
+
+3. Compile and install the RoCE library.
+
+   .. code-block:: shell
+
+      sh autogen.sh
+      ./configure
+      make
+      find /usr/lib64/ /usr/lib -name "libbnxt_re-rdmav*.so" -exec mv {} {}.inbox \;
+      make install all
+      sh -c "echo /usr/local/lib >> /etc/ld.so.conf"
+      ldconfig
+      cp -f bnxt_re.driver /etc/libibverbs.d/
+      find . -name "*.so" -exec md5sum {} \;
+      BUILT_MD5SUM=$(find . -name "libbnxt_re-rdmav*.so" -exec md5sum {} \; | cut -d " " -f 1)
+
+Environment setup
+=================
+
+Before running multi-node workloads, set these essential environment variables:
+
+Master address
+--------------
+
+By default, ``localhost`` is used for single-node configurations. Change
+``localhost`` to the master node's resolvable hostname or IP address:
+
+.. code-block:: bash
+
+   export MASTER_ADDR="${MASTER_ADDR:-localhost}"
+
+Number of nodes
+---------------
+
+Set the number of nodes you want to train on (for example, ``2``, ``4``, or ``8``):
+
+.. code-block:: bash
+
+   export NNODES="${NNODES:-<num_nodes>}"
+
+Node ranks
+----------
+
+Set the rank of each node (``0`` for master, ``1`` for the first worker node, and so on).
+Node ranks should be unique across all nodes in the cluster.
+
+.. code-block:: bash
+
+   export NODE_RANK="${NODE_RANK:-<node_rank>}"
+
+Network interface
+-----------------
+
+Update the network interface in the script to match your system's network interface. To
+find your network interface, run the following (outside of any Docker container):
+
+.. code-block:: bash
+
+   ip a
+
+Look for an active interface (status "UP") with an IP address in the same subnet as
+your other nodes. Then, update the following variable in the script, for
+example:
+
+.. code-block:: bash
+
+   export NCCL_SOCKET_IFNAME=ens50f0np0
+
+This variable specifies which network interface to use for inter-node communication.
+Setting this variable to the incorrect interface can result in communication failures
+or significantly reduced performance.
+
+.. tip::
+
+  This command sets ``NCCL_SOCKET_IFNAME``'s value to the last RDMA interface.
+
+  .. code-block:: bash
+
+     export NCCL_SOCKET_IFNAME=$(rdma link show | awk '{print $NF}' | sort | tail -n1)
+
+RDMA/IB interface
+-----------------
+
+Set the RDMA interfaces to be used for communication. NICs can come from different vendors and the names of the RDMA interface can be different. To get the list of all the RDMA/IB devices, run:
+
+.. code-block:: bash
+
+   ibv_devices
+
+The command below gets the list of all RDMA/IB devices and puts them in a
+comma-separated format. If
+(``rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7``) are your RDMA
+interfaces, then set:
+
+.. code-block:: bash
+
+   # If using Broadcom NIC
+   export NCCL_IB_HCA=rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7
+   # If using Mellanox NIC
+   # export NCCL_IB_HCA=mlx5_0,mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_5,mlx5_8,mlx5_9
+
+.. tip::
+
+  Alternatively, if you want to choose the RDMA interface automatically, you
+  can use the following. This command will sort the RDMA interfaces and then
+  select the first eight RDMA interfaces.
+
+  .. code-block:: bash
+
+     export NCCL_IB_HCA=$(ibv_devices | awk 'NR>2 {print $1}' | sort | head -n 8 | paste -sd,)
+
+Global ID index
+---------------
+
+Update the global ID index if you're using RoCE.
+
+.. code-block:: bash
+
+   export NCCL_IB_GID_INDEX=3
+
+.. _multi-node-setup-training-examples:
+
+Multi-node training examples
+============================
+
+The following examples use the Slurm workload manager to launch jobs on
+multiple nodes. To run these scripts as-is, you must have a Slurm environment
+configured. The scripts are designed to work with both Broadcom Thor 2 and
+Mellanox NICs by automatically installing the required libraries and setting
+the necessary environment variables. For systems with Broadcom NICs, the
+scripts assume the host's RoCE library is located in the ``/opt`` directory.
+
+The following benchmarking examples demonstrate the training of a Llama 3 8B model
+across multiple 8-GPU nodes, using FSDP for intra-node parallelism and DP for
+inter-node parallelism.
+
+.. _rocm-for-ai-multi-node-setup-jax-train-example:
+
+JAX MaxText
+-----------
+
+1. Download the desired multi-node benchmarking script from `<https://github.com/ROCm/MAD/tree/develop/scripts/jax-maxtext/gpu-rocm>`__.
+
+   .. code-block:: shell
+
+      wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/jax-maxtext/gpu-rocm/llama3_8b_multinode.sh
+
+   Or clone the `<https://github.com/ROCm/MAD>`__ repository.
+
+   .. code-block:: shell
+
+      git clone https://github.com/ROCm/MAD
+      cd scripts/jax-maxtext/gpu-rocm
+
+2. Run the benchmark for multi-node training.
+
+   .. code-block:: shell
+
+      sbatch -N <num_nodes> llama3_8b_multinode.sh
+
+.. _rocm-for-ai-multi-node-setup-pyt-train-example:
+
+PyTorch training
+----------------
+
+.. note::
+
+   The ROCm PyTorch Training Docker image now focuses on :doc:`Training a model
+   with Primus and PyTorch <../training/benchmark-docker/primus-pytorch>`. The
+   following example refers to the legacy workflow :ref:`Training a
+   model with PyTorch <amd-pytorch-training-multinode-examples>`.
+
+1. Download the ``run_multinode_train.sh`` benchmarking script from `<https://github.com/ROCm/MAD/tree/develop/scripts/pytorch_train>`__.
+
+   .. code-block:: shell
+
+      wget https://raw.githubusercontent.com/ROCm/MAD/refs/heads/develop/scripts/pytorch_train/run_multinode_train.sh
+
+   Or clone the `<https://github.com/ROCm/MAD>`__ repository.
+
+   .. code-block:: shell
+
+      git clone https://github.com/ROCm/MAD
+      cd scripts/pytorch_train
+
+2. Run the benchmark for multi-node training.
+
+   .. code-block:: shell
+
+      sbatch -N <num_nodes> run_multinode_train.sh
+
+.. seealso::
+
+   See :ref:`Training a model with PyTorch <amd-pytorch-multinode-examples>` for more examples and information.
+
+Megatron-LM
+-----------
+
+.. note::
+
+   The Megatron-LM Docker image now focuses on :ref:`Training a model with
+   Primus and Megatron <amd-primus-megatron-multi-node-examples>`. The
+   following example refers to the legacy Megatron-LM :ref:`Training a model
+   with Megatron-LM <amd-megatron-lm-multi-node-examples>` and might have
+   limited support.
+
+1. Download the ``train_llama_slurm.sh`` benchmarking script from
+   `<https://github.com/ROCm/Megatron-LM/blob/rocm_dev/examples/llama/train_llama_slurm.sh>`__.
+
+2. Set the network interface parameters as per the above guidelines and run the script.
+
+   .. code-block:: shell
+
+      cd </path/to/your/Megatron-LM>
+      export NETWORK_INTERFACE=$NCCL_SOCKET_IFNAME
+      export NCCL_IB_HCA=$NCCL_IB_HCA
+      export IMAGE=docker.io/rocm/megatron-lm:latest OR your preferred image
+      export DATA_CACHE_PATH=/nfs/mounted/repo
+
+      sbatch –N <num_nodes> examples/llama/train_llama_slurm.sh <MODEL_SIZE> <MBS> <GBS> <SEQ_LENGTH> <FSDP> <RECOMPUTE>
+
+2. For example, to run a Llama 3 8B workload in BF16 precision, use the following command.
+
+   .. code-block:: shell
+
+      MODEL_NAME=llama3 sbatch –N 8 examples/llama/train_llama_slurm.sh 8 2 128 8192 0 0
+      # Other parameters, such as TP, FP8 datatype, can be adjusted in the script.
+
+Further reading
+===============
+
+* `Multi-node network configuration for AMD Instinct accelerators <https://instinct.docs.amd.com/projects/gpu-cluster-networking/en/latest/how-to/multi-node-config.html>`__
+
+* `Ethernet networking guide for AMD Instinct MI300X GPU clusters: Compiling Broadcom NIC software from source <https://docs.broadcom.com/doc/957608-AN2XX#page=81>`__
--- a/docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
+++ b/docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 .. meta::
   :description: Prerequisite system validation before using ROCm for AI.
   :keywords: ROCm, AI, LLM, train, megatron, Llama, tutorial, docker, torch, pytorch, jax
--- a/docs/how-to/rocm-for-ai/system-setup/system-health-check.rst
+++ b/docs/how-to/rocm-for-ai/system-setup/system-health-check.rst
@@ -1,12 +1,14 @@
+:orphan:
+
 .. meta::
   :description: System health checks with RVS, RCCL tests, BabelStream, and TransferBench to validate AMD hardware performance running AI workloads.
   :keywords: gpu, accelerator, system, health, validation, bench, perf, performance, rvs, rccl, babel, mi300x, mi325x, flops, bandwidth, rbt, training, inference

 .. _rocm-for-ai-system-health-bench:

-************************
-System health benchmarks
-************************
+*****************************************
+System health benchmarks for AI workloads
+*****************************************

 Before running AI workloads, it is important to validate that your AMD hardware is configured correctly and is performing optimally. This topic outlines several system health benchmarks you can use to test key aspects like GPU compute capabilities (FLOPS), memory bandwidth, and interconnect performance. Many of these tests are part of the ROCm Validation Suite (RVS).

@@ -31,7 +33,7 @@ installed, run the following command:
   sudo apt install rocm-validation-suite

 See the `ROCm Validation Suite installation instructions <https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/install/installation.html>`_,
-and `System validation tests <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#system-validation-tests>`_
+and `System validation tests <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/common/system-validation.html>`_
 in the Instinct documentation for more detailed instructions.

 Benchmark, stress, and qualification tests
@@ -41,7 +43,7 @@ The GPU stress test runs various GEMM computations as workloads to stress the GP
 meets the configured target GFLOPS.

 Run the benchmark, stress, and qualification tests included with RVS. See the `Benchmark, stress, qualification
-<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#benchmark-stress-qualification>`_
+<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/common/system-validation.html#benchmark-stress-qualification>`_
 section of the Instinct documentation for usage instructions.

 BabelStream test
@@ -53,7 +55,7 @@ BabelStream tests are included with the RVS package as part of the `BABEL module
 <https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/conceptual/rvs-modules.html#babel-benchmark-test-babel-module>`_.

 For more information, see `Performance benchmarking
-<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#babelstream-benchmarking-results>`_
+<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/common/system-validation.html#babelstream>`_
 in the Instinct documentation.

 RCCL tests
@@ -62,7 +64,7 @@ RCCL tests
 The ROCm Communication Collectives Library (RCCL) enables efficient multi-GPU
 communication. The `<https://github.com/ROCm/rccl-tests>`__ suite benchmarks
 the performance and verifies the correctness of these collective operations.
-This helps ensure optimal scaling for multi-accelerator tasks.
+This helps ensure optimal scaling for multi-GPU tasks.

 1. To get started, build RCCL-tests using the official instructions in the README at
   `<https://github.com/ROCm/rccl-tests?tab=readme-ov-file#build>`__ or use the
@@ -75,8 +77,8 @@ This helps ensure optimal scaling for multi-accelerator tasks.
      make

 2. Run the suggested RCCL tests -- see `RCCL benchmarking
-   <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#rccl-benchmarking-results>`_
-   in the Instinct performance benchmarking documentation for instructions.
+   <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/network/rdma-benchmarking.html#rccl-benchmarking-results>`_
+   in the AMD Instinct customer acceptance guide.

 TransferBench test
 ==================
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
@@ -10,10 +10,10 @@ MaxText is a high-performance, open-source framework built on the Google JAX
 machine learning library to train LLMs at scale. The MaxText framework for
 ROCm is an optimized fork of the upstream
 `<https://github.com/AI-Hypercomputer/maxtext>`__ enabling efficient AI workloads
-on AMD MI300X series accelerators.
+on AMD MI300X series GPUs.

 The MaxText for ROCm training Docker image
-provides a prebuilt environment for training on AMD Instinct MI300X and MI325X accelerators,
+provides a prebuilt environment for training on AMD Instinct MI300X and MI325X GPUs,
 including essential components like JAX, XLA, ROCm libraries, and MaxText utilities.
 It includes the following software components:

@@ -25,7 +25,7 @@ It includes the following software components:
      {% for docker in dockers %}
      {% set jax_version = docker.components["JAX"] %}

-      .. tab-item:: JAX {{ jax_version }}
+      .. tab-item:: ``{{ docker.pull_tag }}``
         :sync: {{ docker.pull_tag }}

         .. list-table::
@@ -47,10 +47,6 @@ It includes the following software components:
            ``shardy=False`` during the training run. You can also follow the `migration
            guide <https://docs.jax.dev/en/latest/shardy_jax_migration.html>`__ to enable
            it.
-
-            The provided multi-node training scripts in this documentation are
-            not currently supported with JAX 0.6.0. For multi-node training, use the JAX 0.5.0
-            Docker image.
         {% endif %}

      {% endfor %}
@@ -73,7 +69,7 @@ Supported models
 ================

 The following models are pre-optimized for performance on AMD Instinct MI300
-series accelerators. Some instructions, commands, and available training
+series GPUs. Some instructions, commands, and available training
 configurations in this documentation might vary by model -- select one to get
 started.

@@ -136,87 +132,35 @@ This Docker image is optimized for specific model configurations outlined
 as follows. Performance can vary for other training workloads, as AMD
 doesn’t validate configurations and run conditions outside those described.

+Pull the Docker image
+---------------------
+
+Use the following command to pull the Docker image from Docker Hub.
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/jax-maxtext-benchmark-models.yaml
+
+   {% set dockers = data.dockers %}
+   .. tab-set::
+
+      {% for docker in dockers %}
+      {% set jax_version = docker.components["JAX"] %}
+
+      .. tab-item:: JAX {{ jax_version }}
+         :sync: {{ docker.pull_tag }}
+
+         .. code-block:: shell
+
+            docker pull {{ docker.pull_tag }}
+
+      {% endfor %}
+
 .. _amd-maxtext-multi-node-setup-v257:

-Multi-node setup
----------------
+Multi-node configuration
+------------------------

-For multi-node environments, ensure you have all the necessary packages for
-your network device, such as, RDMA. If you're not using a multi-node setup
-with RDMA, skip ahead to :ref:`amd-maxtext-get-started-v257`.
-
-1. Install the following packages to build and install the RDMA driver.
-
-   .. code-block:: shell
-
-      sudo apt install iproute2 -y
-      sudo apt install -y linux-headers-"$(uname-r)" libelf-dev
-      sudo apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core strace libibmad5 libibnetdisc5 ibverbs-providers libibumad-dev libibumad3 libibverbs1 libnl-3-dev libnl-route-3-dev
-
-   Refer to your NIC manufacturer's documentation for further steps on
-   compiling and installing the RoCE driver. For example, for Broadcom,
-   see `Compiling Broadcom NIC software from source <https://docs.broadcom.com/doc/957608-AN2XX#G3.484341>`_
-   in `Ethernet networking guide for AMD Instinct MI300X GPU clusters <https://docs.broadcom.com/doc/957608-AN2XX>`_.
-
-2. Set the following environment variables.
-
-   a. Master address
-
-      Change ``localhost`` to the master node's resolvable hostname or IP address:
-
-      .. code-block:: bash
-
-         export MASTER_ADDR="${MASTER_ADDR:-localhost}"
-
-   b. Number of nodes
-
-      Set the number of nodes you want to train on (for example, ``2``, ``4``, or ``8``):
-
-      .. code-block:: bash
-
-         export NNODES="${NNODES:-1}"
-
-   c. Node ranks
-
-      Set the rank of each node (``0`` for master, ``1`` for the first worker node, and so on)
-      Node ranks should be unique across all nodes in the cluster.
-
-      .. code-block:: bash
-
-         export NODE_RANK="${NODE_RANK:-0}"
-
-   d. Network interface
-
-      Update the network interface in the script to match your system's network interface. To
-      find your network interface, run the following (outside of any Docker container):
-
-      .. code-block:: bash
-
-         ip a
-
-      Look for an active interface with an IP address in the same subnet as
-      your other nodes. Then, update the following variable in the script, for
-      example:
-
-      .. code-block:: bash
-
-         export NCCL_SOCKET_IFNAME=ens50f0np0
-
-      This variable specifies which network interface to use for inter-node communication.
-      Setting this variable to the incorrect interface can result in communication failures
-      or significantly reduced performance.
-
-   e. RDMA interface
-
-      Ensure the :ref:`required packages <amd-maxtext-multi-node-setup-v257>` are installed on all nodes.
-      Then, set the RDMA interfaces to use for communication.
-
-      .. code-block:: bash
-
-         # If using Broadcom NIC
-         export NCCL_IB_HCA=rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7
-         # If using Mellanox NIC
-         export NCCL_IB_HCA=mlx5_0,mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_5,mlx5_8,mlx5_9
+See :doc:`/how-to/rocm-for-ai/system-setup/multi-node-setup` to configure your
+environment for multi-node training.

 .. _amd-maxtext-get-started-v257:

@@ -361,12 +305,6 @@ benchmark results:

                  ./jax-maxtext_benchmark_report.sh -m {{ model.model_repo }} -q nanoo_fp8

-               .. important::
-
-                  Quantized training is not supported with the JAX 0.6.0 Docker image; support
-                  will be added in a future release. For quantized training, use the JAX 0.5.0
-                  Docker image: ``rocm/jax-training:maxtext-v25.7``.
-
            {% endif %}
            {% if model.multinode_training_script and "multi-node" in model.doc_options %}
            .. rubric:: Multi-node training
@@ -379,11 +317,11 @@ benchmark results:
               benchmark. Run them outside of any Docker container.

            1. Make sure ``$HF_HOME`` is set before running the test. See
-               `ROCm benchmarking <https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/readme.md>`__
+               `ROCm benchmarking <https://github.com/ROCm/MAD/blob/develop/scripts/jax-maxtext/gpu-rocm/readme.md>`__
               for more details on downloading the Llama models before running the
               benchmark.

-            2. To run multi-node training for {{ model.model }}, 
+            2. To run multi-node training for {{ model.model }},
               use the
               `multi-node training script <https://github.com/ROCm/MAD/blob/develop/scripts/jax-maxtext/gpu-rocm/{{ model.multinode_training_script }}>`__
               under the ``scripts/jax-maxtext/gpu-rocm/`` directory.
@@ -409,7 +347,7 @@ Further reading
 - To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.

 - To learn more about system settings and management practices to configure your system for
-  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.

 - For a list of other ready-made Docker images for AI with ROCm, see
  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
@@ -10,20 +10,20 @@ Training a model with Megatron-LM on ROCm

 .. caution::

-   Primus with Megatron supersedes this ROCm Megatron-LM training workflow.
+   Primus with Megatron is designed to replace this ROCm Megatron-LM training workflow.
   To learn how to migrate workloads from Megatron-LM to Primus with Megatron,
   see :doc:`previous-versions/megatron-lm-primus-migration-guide`.

 The `Megatron-LM framework for ROCm <https://github.com/ROCm/Megatron-LM>`_ is
 a specialized fork of the robust Megatron-LM, designed to enable efficient
 training of large-scale language models on AMD GPUs. By leveraging AMD
-Instinct™ MI300X series accelerators, Megatron-LM delivers enhanced
+Instinct™ MI300X series GPUs, Megatron-LM delivers enhanced
 scalability, performance, and resource utilization for AI workloads. It is
 purpose-built to support models like Llama, DeepSeek, and Mixtral,
 enabling developers to train next-generation AI models more
 efficiently.

-AMD provides ready-to-use Docker images for MI300X series accelerators containing
+AMD provides ready-to-use Docker images for MI300X series GPUs containing
 essential components, including PyTorch, ROCm libraries, and Megatron-LM
 utilities. It contains the following software components to accelerate training
 workloads:
@@ -61,7 +61,7 @@ workloads:
   ================

   The following models are supported for training performance benchmarking with Megatron-LM and ROCm
-   on AMD Instinct MI300X series accelerators.
+   on AMD Instinct MI300X series GPUs.
   Some instructions, commands, and training recommendations in this documentation might
   vary by model -- select one to get started.

@@ -115,7 +115,7 @@ popular AI models.
   The performance data presented in
   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`__
   only reflects the latest version of this training benchmarking environment.
-   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
+   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X GPUs or ROCm software.

 System validation
 =================
@@ -138,11 +138,11 @@ Environment setup
 =================

 Use the following instructions to set up the environment, configure the script to train models, and
-reproduce the benchmark results on MI300X series accelerators with the AMD Megatron-LM Docker
+reproduce the benchmark results on MI300X series GPUs with the AMD Megatron-LM Docker
 image.

 .. _amd-megatron-lm-requirements:
- 
+
 Download the Docker image
 -------------------------

@@ -152,7 +152,7 @@ Download the Docker image
   1. Use the following command to pull the Docker image from Docker Hub.

      {% if dockers|length > 1 %}
-      .. tab-set:: 
+      .. tab-set::

         {% for docker in data.dockers %}
         .. tab-item:: {{ docker.doc_name }}
@@ -281,25 +281,11 @@ Configuration

   See :ref:`Key options <amd-megatron-lm-benchmark-test-vars>` for more information on configuration options.

-Network interface
-----------------
+Multi-node configuration
+------------------------

-Update the network interface in the script to match your system's network interface. To
-find your network interface, run the following (outside of any Docker container):
-
-.. code-block:: bash
-
-   ip a
-
-Look for an active interface that has an IP address in the same subnet as
-your other nodes. Then, update the following variables in the script, for
-example:
-
-.. code-block:: bash
-
-   export NCCL_SOCKET_IFNAME=ens50f0np0
-
-   export GLOO_SOCKET_IFNAME=ens50f0np0
+Refer to :doc:`/how-to/rocm-for-ai/system-setup/multi-node-setup` to configure your environment for multi-node
+training. See :ref:`amd-megatron-lm-multi-node-examples` for example run commands.

 .. _amd-megatron-lm-tokenizer:

@@ -540,46 +526,6 @@ Download the dataset

   Ensure that the files are accessible inside the Docker container.

-Multi-node configuration
------------------------
-
-If you're running multi-node training, update the following environment variables. They can
-also be passed as command line arguments. Refer to the following example configurations.
-
-* Change ``localhost`` to the master node's hostname:
-
-  .. code-block:: shell
-
-     MASTER_ADDR="${MASTER_ADDR:-localhost}"
-
-* Set the number of nodes you want to train on (for instance, ``2``, ``4``, ``8``):
-
-  .. code-block:: shell
-
-     NNODES="${NNODES:-1}"
-
-* Set the rank of each node (0 for master, 1 for the first worker node, and so on):
-
-  .. code-block:: shell
-
-     NODE_RANK="${NODE_RANK:-0}"
-
-* Set ``DATA_CACHE_PATH`` to a common directory accessible by all the nodes (for example, an
-  NFS directory) for multi-node runs:
-
-  .. code-block:: shell
-
-     DATA_CACHE_PATH=/root/cache # Set to a common directory for multi-node runs
-
-* For multi-node runs, make sure the correct network drivers are installed on the nodes. If
-  inside a Docker container, either install the drivers inside the Docker container or pass the network
-  drivers from the host while creating the Docker container.
-
-  .. code-block:: shell
-
-     # Specify which RDMA interfaces to use for communication
-     export NCCL_IB_HCA=rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7
-
 .. _amd-megatron-lm-run-training:

 Run training
@@ -587,7 +533,7 @@ Run training

 Use the following example commands to set up the environment, configure
 :ref:`key options <amd-megatron-lm-benchmark-test-vars>`, and run training on
-MI300X series accelerators with the AMD Megatron-LM environment.
+MI300X series GPUs with the AMD Megatron-LM environment.

 Single node training
 --------------------
@@ -612,7 +558,7 @@ Single node training
      FSDP=1 \
      MODEL_SIZE=70 \
      TOTAL_ITERS=50 \
-      bash examples/llama/train_llama3.sh 
+      bash examples/llama/train_llama3.sh

   .. note::

@@ -770,7 +716,7 @@ Single node training

 .. container:: model-doc pyt_megatron_lm_train_deepseek-v3-proxy

-   To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy, 
+   To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy,
   navigate to the Megatron-LM folder and use the following command.

   .. code-block:: shell
@@ -925,6 +871,8 @@ Single node training
          RECOMPUTE_ACTIVATIONS=full \
          CKPT_FORMAT=torch_dist

+.. _amd-megatron-lm-multi-node-examples:
+
 Multi-node training examples
 ----------------------------

--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4.rst
@@ -202,16 +202,14 @@ Getting started

 The following examples demonstrate how to get started with single node
 and multi-node training using the benchmarking scripts provided at
-`<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/>`__.
+`<https://github.com/ROCm/maxtext/>`__.

 .. important::

   The provided scripts launch a Docker container and execute a benchmark. Ensure you run these commands outside of any existing Docker container.

 Before running any benchmarks, ensure the ``$HF_HOME`` environment variable is
-set correctly and points to your Hugging Face cache directory. Refer to the
-README at `<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/>`__
-for more detailed instructions.
+set correctly and points to your Hugging Face cache directory.

 Single node training benchmarking examples
 ------------------------------------------
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5.rst
@@ -213,16 +213,14 @@ Getting started

 The following examples demonstrate how to get started with single node
 and multi-node training using the benchmarking scripts provided at
-`<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/>`__.
+`<https://github.com/ROCm/maxtext/>`__.

 .. important::

   The provided scripts launch a Docker container and execute a benchmark. Ensure you run these commands outside of any existing Docker container.

 Before running any benchmarks, ensure the ``$HF_HOME`` environment variable is
-set correctly and points to your Hugging Face cache directory. Refer to the
-README at `<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/>`__
-for more detailed instructions.
+set correctly and points to your Hugging Face cache directory.

 Single node training benchmarking examples
 ------------------------------------------
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-history.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-history.rst
@@ -16,12 +16,22 @@ previous releases of the ``ROCm/megatron-lm`` Docker image on `Docker Hub <https
     - Components
     - Resources

-   * - v25.7 (latest)
+   * - v25.8 (latest)
     - 
-       * ROCm 
-       * PyTorch 
+       * ROCm 6.4.3
+       * PyTorch 2.8.0a0+gitd06a406
     - 
-       * :doc:`Documentation <../megatron-lm>`
+       * :doc:`Primus Megatron documentation <../primus-megatron>`
+       * :doc:`Megatron-LM (legacy) documentation <../megatron-lm>`
+       * `Docker Hub (py310) <https://hub.docker.com/r/rocm/megatron-lm/tags>`__
+
+   * - v25.7
+     - 
+       * ROCm 6.4.2
+       * PyTorch 2.8.0a0+gitd06a406
+     - 
+       * :doc:`Primus Megatron documentation <primus-megatron-v25.7>`
+       * :doc:`Megatron-LM (legacy) documentation <megatron-lm-v25.7>`
       * `Docker Hub (py310) <https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a>`__

   * - v25.6
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide.rst
@@ -1,12 +1,12 @@
 :orphan:

-**********************************************************************
-Migrating workloads to Primus (Megatron-Core backend) from Megatron-LM
-**********************************************************************
+*****************************************************************
+Migrating workloads to Primus (Megatron backend) from Megatron-LM
+*****************************************************************

 Primus supports Megatron-Core as backend optimization library,
 replacing ROCm Megatron-LM. This document outlines the steps to migrate
-workload from ROCm Megatron-LM to Primus with the Megatron-Core backend.
+workload from ROCm Megatron-LM to Primus with the Megatron backend.

 Model architecture
 ==================
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7.rst
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7.rst
@@ -0,0 +1,604 @@
+:orphan:
+
+.. meta::
+   :description: How to train a model using Megatron-LM for ROCm.
+   :keywords: ROCm, AI, LLM, train, Megatron-LM, megatron, Llama, tutorial, docker, torch
+
+********************************************
+Training a model with Primus and Megatron-LM
+********************************************
+
+.. caution::
+
+   This documentation does not reflect the latest version of ROCm Megatron-LM
+   training performance documentation. See :doc:`../primus-megatron` for the latest version.
+
+`Primus <https://github.com/AMD-AGI/Primus>`__ is a unified and flexible
+LLM training framework designed to streamline training. It streamlines LLM
+training on AMD Instinct accelerators using a modular, reproducible configuration paradigm.
+Primus is backend-agnostic and supports multiple training engines -- including Megatron.
+
+.. note::
+
+   Primus with the Megatron backend is intended to replace ROCm
+   Megatron-LM in this Dockerized training environment. To learn how to migrate
+   workloads from Megatron-LM to Primus with Megatron, see
+   :doc:`megatron-lm-primus-migration-guide`.
+
+For ease of use, AMD provides a ready-to-use Docker image for MI300 series accelerators
+containing essential components for Primus and Megatron-LM.
+
+.. note::
+
+   This Docker environment is based on Python 3.10 and Ubuntu 22.04. For an alternative environment with
+   Python 3.12 and Ubuntu 24.04, see the :doc:`previous ROCm Megatron-LM v25.6 Docker release <megatron-lm-v25.6>`.
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+
+   {% set dockers = data.dockers %}
+   {% set docker = dockers[0] %}
+   .. list-table::
+      :header-rows: 1
+
+      * - Software component
+        - Version
+
+      {% for component_name, component_version in docker.components.items() %}
+      * - {{ component_name }}
+        - {{ component_version }}
+      {% endfor %}
+
+.. _amd-primus-megatron-lm-model-support-v257:
+
+Supported models
+================
+
+The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
+Some instructions, commands, and training examples in this documentation might
+vary by model -- select one to get started.
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+
+   {% set model_groups = data.model_groups %}
+   .. raw:: html
+
+      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
+         <div class="row gx-0">
+            <div class="col-2 me-1 px-2 model-param-head">Model</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+               <div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+      {% endfor %}
+            </div>
+         </div>
+
+         <div class="row gx-0 pt-1">
+            <div class="col-2 me-1 px-2 model-param-head">Variant</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+         {% set models = model_group.models %}
+         {% for model in models %}
+            {% if models|length % 3 == 0 %}
+               <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% else %}
+               <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% endif %}
+         {% endfor %}
+      {% endfor %}
+            </div>
+         </div>
+      </div>
+
+.. note::
+
+   Some models, such as Llama, require an external license agreement through
+   a third party (for example, Meta).
+
+System validation
+=================
+
+Before running AI workloads, it's important to validate that your AMD hardware is configured
+correctly and performing optimally.
+
+If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+before starting training.
+
+To test for optimal performance, consult the recommended :ref:`System health benchmarks
+<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+system's configuration.
+
+.. _mi300x-amd-primus-megatron-lm-training-v257:
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+
+   {% set dockers = data.dockers %}
+      {% set docker = dockers[0] %}
+
+   Environment setup
+   =================
+
+   Use the following instructions to set up the environment, configure the script to train models, and
+   reproduce the benchmark results on MI300X series accelerators with the ``{{ docker.pull_tag }}`` image.
+
+   .. _amd-primus-megatron-lm-requirements-v257:
+
+   Download the Docker image
+   -------------------------
+
+   1. Use the following command to pull the Docker image from Docker Hub.
+
+      .. code-block:: shell
+
+         docker pull {{ docker.pull_tag }}
+
+   2. Launch the Docker container.
+
+      .. code-block:: shell
+
+         docker run -it \
+             --device /dev/dri \
+             --device /dev/kfd \
+             --device /dev/infiniband \
+             --network host --ipc host \
+             --group-add video \
+             --cap-add SYS_PTRACE \
+             --security-opt seccomp=unconfined \
+             --privileged \
+             -v $HOME:$HOME \
+             --shm-size 128G \
+             --name primus_training_env \
+             {{ docker.pull_tag }}
+
+3. Use these commands if you exit the ``primus_training_env`` container and need to return to it.
+
+   .. code-block:: shell
+
+      docker start primus_training_env
+      docker exec -it primus_training_env bash
+
+The Docker container hosts verified release tag ``v0.1.0-rc1`` of the `Primus
+<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1>`__ repository.
+
+.. _amd-primus-megatron-lm-environment-setup-v257:
+
+Configuration
+=============
+
+Primus defines a training configuration in YAML for each model in
+`examples/megatron/configs <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/megatron/configs>`__.
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+
+   {% set model_groups = data.model_groups %}
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+   .. container:: model-doc {{ model.mad_tag }}
+
+      To update training parameters for {{ model.model }}, you can update ``examples/megatron/configs/{{ model.config_name }}``.
+      Note that training configuration YAML files for other models follow this naming convention.
+
+      {% endfor %}
+   {% endfor %}
+
+.. note::
+
+   See :ref:`Key options <amd-primus-megatron-lm-benchmark-test-vars>` for more information on configuration options.
+
+Dataset options
+---------------
+
+You can use either mock data or real data for training.
+
+* Mock data can be useful for testing and validation. Use the ``mock_data`` field to toggle between mock and real data. The default
+  value is ``true`` for enabled.
+
+  .. code-block:: yaml
+
+     mock_data: true
+
+* If you're using a real dataset, update the ``train_data_path`` field to point to the location of your dataset.
+
+  .. code-block:: bash
+
+     mock_data: false
+     train_data_path: /path/to/your/dataset
+
+  Ensure that the files are accessible inside the Docker container.
+
+.. _amd-primus-megatron-lm-tokenizer-v257:
+
+Tokenizer
+---------
+
+In Primus, each model uses a tokenizer from Hugging Face. For example, Llama
+3.1 8B model uses ``tokenizer_model: meta-llama/Llama-3.1-8B`` and
+``tokenizer_type: Llama3Tokenizer`` defined in the `llama3.1-8B model
+<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/primus/configs/models/megatron/llama3.1_8B.yaml>`__
+definition. As such, you need to set the ``HF_TOKEN`` environment variable with
+right permissions to access the tokenizer for each model.
+
+.. code-block:: bash
+
+   # Export your HF_TOKEN in the workspace
+   export HF_TOKEN=<your_hftoken>
+
+.. _amd-primus-megatron-lm-run-training-v257:
+
+Run training
+============
+
+Use the following example commands to set up the environment, configure
+:ref:`key options <amd-primus-megatron-lm-benchmark-test-vars>`, and run training on
+MI300X series accelerators with the AMD Megatron-LM environment.
+
+Single node training
+--------------------
+
+To run training on a single node, navigate to ``/workspace/Primus`` and use the following setup command:
+
+.. code-block:: shell
+
+   pip install -r requirements.txt
+   export HSA_NO_SCRATCH_RECLAIM=1
+   export NVTE_CK_USES_BWD_V3=1
+
+Once setup is complete, run the appropriate training command.
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.3-70b
+
+   To run pre-training for Llama 3.3 70B BF16, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh \
+          --micro_batch_size 2 \
+          --global_batch_size 16 \
+          --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-8b
+
+   To run pre-training for Llama 3.1 8B FP8, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh \
+          --train_iters 50 \
+          --fp8 hybrid
+
+   For Llama 3.1 8B BF16, use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-70b
+
+   To run pre-training for Llama 3.1 70B BF16, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh \
+           --train_iters 50
+
+   To run the training on a single node for Llama 3.1 70B FP8 with proxy, use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh \
+          --train_iters 50 \
+          --num_layers 40 \
+          --fp8 hybrid \
+          --no_fp8_weight_transpose_cache true
+
+   .. note::
+
+      Use two or more nodes to run the *full* Llama 70B model with FP8 precision.
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-7b
+
+   To run pre-training for Llama 2 7B FP8, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama2_7B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh \
+          --train_iters 50 \
+          --fp8 hybrid
+
+   To run pre-training for Llama 2 7B BF16, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama2_7B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b
+
+   To run pre-training for Llama 2 70B BF16, run:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
+      bash ./examples/run_pretrain.sh --train_iters 50 
+
+.. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v3-proxy
+
+   To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy, 
+   use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/deepseek_v3-pretrain.yaml \
+      bash examples/run_pretrain.sh \
+          --num_layers 3 \
+          --moe_layer_freq 1 \
+          --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
+
+   To run training on a single node for DeepSeek-V2-Lite (MoE with expert parallel),
+   use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/deepseek_v2_lite-pretrain.yaml \
+      bash examples/run_pretrain.sh \
+          --global_batch_size 256 \
+          --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x7b
+
+   To run training on a single node for Mixtral 8x7B (MoE with expert parallel),
+   use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/mixtral_8x7B_v0.1-pretrain.yaml \
+      bash examples/run_pretrain.sh --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
+
+   To run training on a single node for Mixtral 8x7B (MoE with expert parallel) with 4-layer proxy,
+   use the following command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/mixtral_8x22B_v0.1-pretrain.yaml \
+      bash examples/run_pretrain.sh \
+          --num_layers 4 \
+          --pipeline_model_parallel_size 1 \
+          --micro_batch_size 1 \
+          --global_batch_size 16 \
+          --train_iters 50
+
+.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-7b
+
+   To run training on a single node for Qwen 2.5 7B BF16, use the following
+   command:
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/qwen2.5_7B-pretrain.yaml \
+      bash examples/run_pretrain.sh --train_iters 50
+
+   For FP8, use the following command.
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/qwen2.5_7B-pretrain.yaml \
+      bash examples/run_pretrain.sh \
+          --train_iters 50 \
+          --fp8 hybrid
+
+.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-72b
+
+   To run the training on a single node for Qwen 2.5 72B BF16, use the following command.
+
+   .. code-block:: shell
+
+      EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
+      bash examples/run_pretrain.sh --train_iters 50
+
+Multi-node training examples
+----------------------------
+
+To run training on multiple nodes, you can use the
+`run_slurm_pretrain.sh <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/run_slurm_pretrain.sh>`__
+to launch the multi-node workload. Use the following steps to setup your environment:
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+
+   {% set dockers = data.dockers %}
+   {% set docker = dockers[0] %}
+
+   .. code-block:: shell
+
+      cd /workspace/Primus/
+      export DOCKER_IMAGE={{ docker.pull_tag }}
+      export HF_TOKEN=<your_HF_token>
+      export HSA_NO_SCRATCH_RECLAIM=1
+      export NVTE_CK_USES_BWD_V3=1
+      export NCCL_IB_HCA=<your_NCCL_IB_HCA> # specify which RDMA interfaces to use for communication
+      export NCCL_SOCKET_IFNAME=<your_NCCL_SOCKET_IFNAME> # your Network Interface
+      export GLOO_SOCKET_IFNAME=<your_GLOO_SOCKET_IFNAME> # your Network Interface
+      export NCCL_IB_GID_INDEX=3 # Set InfiniBand GID index for NCCL communication. Default is 3 for ROCE
+
+.. note::
+
+   * Make sure correct network drivers are installed on the nodes. If inside a Docker, either install the drivers inside the Docker container or pass the network drivers from the host while creating Docker container.
+   * If ``NCCL_IB_HCA`` and ``NCCL_SOCKET_IFNAME`` are not set, Primus will try to auto-detect. However, since NICs can vary accross different cluster, it is encouraged to explicitly export your NCCL parameters for the cluster.
+   * To find your network interface, you can use ``ip a``.
+   * To find RDMA interfaces, you can use ``ibv_devices`` to get the list of all the RDMA/IB  devices.
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.3-70b
+
+   To train Llama 3.3 70B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 4 \
+          --global_batch_size 256 \
+          --recompute_num_layers 80 \
+          --no_fp8_weight_transpose_cache true \
+          --fp8 hybrid
+
+   To train Llama 3.3 70B BF16 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 1 \
+          --global_batch_size 256 \
+          --recompute_num_layers 12
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-8b
+
+   To train Llama 3.1 8B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case 
+      NNODES=8 EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
+      bash ./examples/run_slurm_pretrain.sh \
+          --global_batch_size 1024 \
+          --fp8 hybrid
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-70b
+
+   To train Llama 3.1 70B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 4 \
+          --global_batch_size 256 \
+          --recompute_num_layers 80 \
+          --no_fp8_weight_transpose_cache true \
+          --fp8 hybrid
+
+   To train Llama 3.1 70B BF16 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 1 \
+          --global_batch_size 256 \
+          --recompute_num_layers 12
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-7b
+
+   To train Llama 2 8B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case 
+      NNODES=8 EXP=examples/megatron/configs/llama2_7B-pretrain.yaml bash ./examples/run_slurm_pretrain.sh --global_batch_size 2048 --fp8 hybrid
+
+.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b
+
+   To train Llama 2 70B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 10 \
+          --global_batch_size 640 \
+          --recompute_num_layers 80 \
+          --no_fp8_weight_transpose_cache true \
+          --fp8 hybrid
+
+   To train Llama 2 70B BF16 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
+      bash ./examples/run_slurm_pretrain.sh \
+          --micro_batch_size 2 \
+          --global_batch_size 1536 \
+          --recompute_num_layers 12
+
+.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x7b
+
+   To train Mixtral 8x7B BF16 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/mixtral_8x7B_v0.1-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 2 \
+          --global_batch_size 256
+
+.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-72b
+
+   To train Qwen2.5 72B FP8 on 8 nodes, run:
+
+   .. code-block:: shell
+
+      NNODES=8 EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
+      bash examples/run_slurm_pretrain.sh \
+          --micro_batch_size 8 \
+          --global_batch_size 512 \
+          --recompute_num_layers 80 \
+          --no_fp8_weight_transpose_cache true \
+          --fp8 hybrid
+
+.. _amd-primus-megatron-lm-benchmark-test-vars-v257:
+
+Key options
+-----------
+
+The following are key options to take note of
+
+fp8
+  ``hybrid`` enables FP8 GEMMs.
+
+use_torch_fsdp2
+  ``use_torch_fsdp2: 1``  enables torch fsdp-v2. If FSDP is enabled,
+  set ``use_distributed_optimizer`` and ``overlap_param_gather`` to ``false``.
+
+profile
+  To enable PyTorch profiling, set these parameters:
+
+  .. code-block:: yaml
+
+     profile: true
+     use_pytorch_profiler: true
+     profile_step_end: 7
+     profile_step_start: 6
+
+train_iters
+  The total number of iterations (default: 50).
+
+mock_data
+  True by default.
+
+micro_batch_size
+  Micro batch size.
+
+global_batch_size
+  Global batch size.
+
+recompute_granularity
+  For activation checkpointing.
+
+num_layers
+  For using a reduced number of layers as with proxy models.
+
+Previous versions
+=================
+
+See :doc:`megatron-lm-history` to find documentation for previous releases
+of the ``ROCm/megatron-lm`` Docker image.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.rst
@@ -2,24 +2,25 @@
   :description: How to train a model using Megatron-LM for ROCm.
   :keywords: ROCm, AI, LLM, train, Megatron-LM, megatron, Llama, tutorial, docker, torch

-**********************************************
-Training a model with Primus and Megatron-Core
-**********************************************
+********************************************
+Training a model with Primus and Megatron-LM
+********************************************

-`Primus <https://github.com/AMD-AIG-AIMA/Primus>`__ is a unified and flexible
+`Primus <https://github.com/AMD-AGI/Primus>`__ is a unified and flexible
 LLM training framework designed to streamline training. It streamlines LLM
-training on AMD Instinct accelerators using a modular, reproducible configuration paradigm.
-Primus is backend-agnostic and supports multiple training engines -- including Megatron-Core.
+training on AMD Instinct GPUs using a modular, reproducible configuration paradigm.
+Primus is backend-agnostic and supports multiple training engines -- including Megatron.

 .. note::

-   Primus with the Megatron-Core backend is intended to replace ROCm
-   Megatron-LM in this Dockerized training environment. To learn how to migrate
-   workloads from Megatron-LM to Primus with Megatron-Core, see
-   :doc:`previous-versions/megatron-lm-primus-migration-guide`.
+   Primus with Megatron is designed to replace the :doc:`ROCm Megatron-LM training <megatron-lm>` workflow.
+   To learn how to migrate workloads from Megatron-LM to Primus with Megatron,
+   see :doc:`previous-versions/megatron-lm-primus-migration-guide`.

-For ease of use, AMD provides a ready-to-use Docker image for MI300 series accelerators
-containing essential components for Primus and Megatron-Core.
+For ease of use, AMD provides a ready-to-use Docker image for MI300 series GPUs
+containing essential components for Primus and Megatron-LM. This Docker is powered by Primus
+Turbo optimizations for performance; this release adds support for Primus Turbo
+with optimized attention and grouped GEMM kernels.

 .. note::

@@ -46,7 +47,7 @@ containing essential components for Primus and Megatron-Core.
 Supported models
 ================

-The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
+The following models are pre-optimized for performance on AMD Instinct MI300X series GPUs.
 Some instructions, commands, and training examples in this documentation might
 vary by model -- select one to get started.

@@ -104,21 +105,26 @@ system's configuration.

 .. _mi300x-amd-primus-megatron-lm-training:

+Environment setup
+=================
+
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml

   {% set dockers = data.dockers %}
      {% set docker = dockers[0] %}

-   Environment setup
-   =================
-
   Use the following instructions to set up the environment, configure the script to train models, and
-   reproduce the benchmark results on MI300X series accelerators with the ``{{ docker.pull_tag }}`` image.
+   reproduce the benchmark results on MI300X series GPUs with the ``{{ docker.pull_tag }}`` image.

   .. _amd-primus-megatron-lm-requirements:

-   Download the Docker image
-   -------------------------
+Pull the Docker image
+=====================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
+
+   {% set dockers = data.dockers %}
+      {% set docker = dockers[0] %}

   1. Use the following command to pull the Docker image from Docker Hub.

@@ -151,8 +157,8 @@ system's configuration.
      docker start primus_training_env
      docker exec -it primus_training_env bash

-The Docker container hosts verified release tag ``v0.1.0-rc1`` of the `Primus
-<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1>`__ repository.
+The Docker container hosts verified commit ``927a717`` of the `Primus
+<https://github.com/AMD-AGI/Primus/tree/927a71702784347a311ca48fd45f0f308c6ef6dd>`__ repository.

 .. _amd-primus-megatron-lm-environment-setup:

@@ -160,7 +166,7 @@ Configuration
 =============

 Primus defines a training configuration in YAML for each model in
-`examples/megatron/configs <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/megatron/configs>`__.
+`examples/megatron/configs <https://github.com/AMD-AGI/Primus/tree/927a71702784347a311ca48fd45f0f308c6ef6dd/examples/megatron/configs>`__.

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml

@@ -205,11 +211,7 @@ You can use either mock data or real data for training.
 Tokenizer
 ---------

-In Primus, each model uses a tokenizer from Hugging Face. For example, Llama
-3.1 8B model uses ``tokenizer_model: meta-llama/Llama-3.1-8B`` and
-``tokenizer_type: Llama3Tokenizer`` defined in the `llama3.1-8B model
-<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/primus/configs/models/megatron/llama3.1_8B.yaml>`__
-definition. As such, you need to set the ``HF_TOKEN`` environment variable with
+Set the ``HF_TOKEN`` environment variable with
 right permissions to access the tokenizer for each model.

 .. code-block:: bash
@@ -217,6 +219,14 @@ right permissions to access the tokenizer for each model.
   # Export your HF_TOKEN in the workspace
   export HF_TOKEN=<your_hftoken>

+.. note::
+
+   In Primus, each model uses a tokenizer from Hugging Face. For example, Llama
+   3.1 8B model uses ``tokenizer_model: meta-llama/Llama-3.1-8B`` and
+   ``tokenizer_type: Llama3Tokenizer`` defined in the `llama3.1-8B model
+   <https://github.com/AMD-AGI/Primus/blob/927a71702784347a311ca48fd45f0f308c6ef6dd/examples/megatron/configs/llama3.1_8B-pretrain.yaml>`__
+   definition.
+
 .. _amd-primus-megatron-lm-run-training:

 Run training
@@ -224,7 +234,7 @@ Run training

 Use the following example commands to set up the environment, configure
 :ref:`key options <amd-primus-megatron-lm-benchmark-test-vars>`, and run training on
-MI300X series accelerators with the AMD Megatron-LM environment.
+MI300X series GPUs with the AMD Megatron-LM environment.

 Single node training
 --------------------
@@ -237,10 +247,12 @@ To run training on a single node, navigate to ``/workspace/Primus`` and use the
   export HSA_NO_SCRATCH_RECLAIM=1
   export NVTE_CK_USES_BWD_V3=1

-Once setup is complete, run the appropriate training command.
-
 .. container:: model-doc primus_pyt_megatron_lm_train_llama-3.3-70b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Llama 3.3 70B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run pre-training for Llama 3.3 70B BF16, run:

   .. code-block:: shell
@@ -253,6 +265,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-8b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Llama 3.1 8B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run pre-training for Llama 3.1 8B FP8, run:

   .. code-block:: shell
@@ -271,6 +287,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-70b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Llama 3.1 70B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run pre-training for Llama 3.1 70B BF16, run:

   .. code-block:: shell
@@ -287,8 +307,7 @@ Once setup is complete, run the appropriate training command.
      bash ./examples/run_pretrain.sh \
          --train_iters 50 \
          --num_layers 40 \
-          --fp8 hybrid \
-          --no_fp8_weight_transpose_cache true
+          --fp8 hybrid

   .. note::

@@ -296,6 +315,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_llama-2-7b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Llama 2 7B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run pre-training for Llama 2 7B FP8, run:

   .. code-block:: shell
@@ -314,16 +337,24 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Llama 2 70B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run pre-training for Llama 2 70B BF16, run:

   .. code-block:: shell

      EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
-      bash ./examples/run_pretrain.sh --train_iters 50 
+      bash ./examples/run_pretrain.sh --train_iters 50

 .. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v3-proxy

-   To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy, 
+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to DeepSeek-V3.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
+   To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy,
   use the following command:

   .. code-block:: shell
@@ -336,6 +367,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v2-lite-16b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to DeepSeek-V2-Lite.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run training on a single node for DeepSeek-V2-Lite (MoE with expert parallel),
   use the following command:

@@ -348,6 +383,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x7b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Mixtral 8x7B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run training on a single node for Mixtral 8x7B (MoE with expert parallel),
   use the following command:

@@ -358,7 +397,11 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x22b-proxy

-   To run training on a single node for Mixtral 8x7B (MoE with expert parallel) with 4-layer proxy,
+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Mixtral 8x22B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
+   To run training on a single node for Mixtral 8x22B (MoE with expert parallel) with 4-layer proxy,
   use the following command:

   .. code-block:: shell
@@ -373,6 +416,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-7b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Qwen 2.5 7B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run training on a single node for Qwen 2.5 7B BF16, use the following
   command:

@@ -392,6 +439,10 @@ Once setup is complete, run the appropriate training command.

 .. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-72b

+   Once setup is complete, run the appropriate training command.
+   The following run commands are tailored to Qwen 2.5 72B.
+   See :ref:`amd-primus-megatron-lm-model-support` to switch to another available model.
+
   To run the training on a single node for Qwen 2.5 72B BF16, use the following command.

   .. code-block:: shell
@@ -399,11 +450,16 @@ Once setup is complete, run the appropriate training command.
      EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
      bash examples/run_pretrain.sh --train_iters 50

+.. _amd-primus-megatron-multi-node-examples:
+
 Multi-node training examples
 ----------------------------

+Refer to :doc:`/how-to/rocm-for-ai/system-setup/multi-node-setup` to configure your environment for multi-node
+training.
+
 To run training on multiple nodes, you can use the
-`run_slurm_pretrain.sh <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/run_slurm_pretrain.sh>`__
+`run_slurm_pretrain.sh <https://github.com/AMD-AGI/Primus/blob/927a71702784347a311ca48fd45f0f308c6ef6dd/examples/run_slurm_pretrain.sh>`__
 to launch the multi-node workload. Use the following steps to setup your environment:

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
@@ -438,10 +494,9 @@ to launch the multi-node workload. Use the following steps to setup your environ

      NNODES=8 EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
      bash examples/run_slurm_pretrain.sh \
-          --micro_batch_size 4 \
+          --micro_batch_size 1 \
          --global_batch_size 256 \
          --recompute_num_layers 80 \
-          --no_fp8_weight_transpose_cache true \
          --fp8 hybrid

   To train Llama 3.3 70B BF16 on 8 nodes, run:
@@ -460,7 +515,7 @@ to launch the multi-node workload. Use the following steps to setup your environ

   .. code-block:: shell

-      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case 
+      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case
      NNODES=8 EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
      bash ./examples/run_slurm_pretrain.sh \
          --global_batch_size 1024 \
@@ -474,10 +529,9 @@ to launch the multi-node workload. Use the following steps to setup your environ

      NNODES=8 EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
      bash examples/run_slurm_pretrain.sh \
-          --micro_batch_size 4 \
+          --micro_batch_size 1 \
          --global_batch_size 256 \
          --recompute_num_layers 80 \
-          --no_fp8_weight_transpose_cache true \
          --fp8 hybrid

   To train Llama 3.1 70B BF16 on 8 nodes, run:
@@ -496,7 +550,7 @@ to launch the multi-node workload. Use the following steps to setup your environ

   .. code-block:: shell

-      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case 
+      # Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case
      NNODES=8 EXP=examples/megatron/configs/llama2_7B-pretrain.yaml bash ./examples/run_slurm_pretrain.sh --global_batch_size 2048 --fp8 hybrid

 .. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b
@@ -507,10 +561,9 @@ to launch the multi-node workload. Use the following steps to setup your environ

      NNODES=8 EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
      bash examples/run_slurm_pretrain.sh \
-          --micro_batch_size 10 \
-          --global_batch_size 640 \
+          --micro_batch_size 2 \
+          --global_batch_size 256 \
          --recompute_num_layers 80 \
-          --no_fp8_weight_transpose_cache true \
          --fp8 hybrid

   To train Llama 2 70B BF16 on 8 nodes, run:
@@ -542,10 +595,9 @@ to launch the multi-node workload. Use the following steps to setup your environ

      NNODES=8 EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
      bash examples/run_slurm_pretrain.sh \
-          --micro_batch_size 8 \
-          --global_batch_size 512 \
+          --micro_batch_size 4 \
+          --global_batch_size 256 \
          --recompute_num_layers 80 \
-          --no_fp8_weight_transpose_cache true \
          --fp8 hybrid

 .. _amd-primus-megatron-lm-benchmark-test-vars:
@@ -590,6 +642,18 @@ recompute_granularity
 num_layers
  For using a reduced number of layers as with proxy models.

+Further reading
+===============
+
+- For an introduction to Primus, see `Primus: A Lightweight, Unified Training
+  Framework for Large Models on AMD GPUs <https://rocm.blogs.amd.com/software-tools-optimization/primus/README.html>`__.
+
+- To learn more about system settings and management practices to configure your system for
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+
+- For a list of other ready-made Docker images for AI with ROCm, see
+  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
+
 Previous versions
 =================

@@ -598,5 +662,4 @@ of the ``ROCm/megatron-lm`` Docker image.

 This training environment now uses Primus with Megatron as the primary
 configuration. Limited support for the legacy ROCm Megatron-LM is still
-available. For instructions on using ROCm Megatron-LM, see the
-:doc:`megatron-lm` document.
+available; see the :doc:`megatron-lm` documentation.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch.rst
@@ -8,12 +8,12 @@ Training a model with Primus and PyTorch

 `Primus <https://github.com/AMD-AGI/Primus>`__ is a unified and flexible
 LLM training framework designed to streamline training. It streamlines LLM
-training on AMD Instinct accelerators using a modular, reproducible configuration paradigm.
+training on AMD Instinct GPUs using a modular, reproducible configuration paradigm.
 Primus now supports the PyTorch torchtitan backend.

 .. note::

-   Primus with the PyTorch torchtitan backend is intended to supersede the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
+   Primus with the PyTorch torchtitan backend is designed to replace the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
   See :doc:`pytorch-training` to see steps to run workloads without Primus.

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
@@ -21,7 +21,7 @@ Primus now supports the PyTorch torchtitan backend.
   {% set dockers = data.dockers %}
   {% set docker = dockers[0] %}
   For ease of use, AMD provides a ready-to-use Docker image -- ``{{
-   docker.pull_tag }}`` -- for MI300X series accelerators containing essential
+   docker.pull_tag }}`` -- for MI300X series GPUs containing essential
   components for Primus and PyTorch training with
   Primus Turbo optimizations.

@@ -41,7 +41,7 @@ Primus now supports the PyTorch torchtitan backend.
 Supported models
 ================

-The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
+The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X GPUs.
 Some instructions, commands, and training recommendations in this documentation might
 vary by model -- select one to get started.

@@ -104,22 +104,25 @@ This Docker image is optimized for specific model configurations outlined
 below. Performance can vary for other training workloads, as AMD
 doesn’t test configurations and run conditions outside those described.

+Pull the Docker image
+=====================
+
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml

   {% set unified_docker = data.dockers[0] %}

-   Pull the Docker image
-   =====================
-
   Use the following command to pull the `Docker image <{{ unified_docker.docker_hub_url }}>`_ from Docker Hub.

   .. code-block:: shell

      docker pull {{ unified_docker.pull_tag }}

-   Run training
-   ============
+Run training
+============

+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
+
+   {% set unified_docker = data.dockers[0] %}
   {% set model_groups = data.model_groups %}

   Once the setup is complete, choose between the following two workflows to start benchmarking training.
@@ -293,7 +296,7 @@ Further reading
 - To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.

 - To learn more about system settings and management practices to configure your system for
-  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.

 - For a list of other ready-made Docker images for AI with ROCm, see
  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
@@ -10,7 +10,7 @@ Training a model with PyTorch on ROCm

 .. note::

-   Primus with the PyTorch torchtitan backend is intended to supersede the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
+   Primus with the PyTorch torchtitan backend is designed to replace :doc:`ROCm PyTorch training <pytorch-training>` workflow.
   See :doc:`primus-pytorch` for details.

 PyTorch is an open-source machine learning framework that is widely used for
@@ -22,7 +22,7 @@ model training with GPU-optimized components for transformer-based models.
   {% set docker = dockers[0] %}
   The `PyTorch for ROCm training Docker <{{ docker.docker_hub_url }}>`__
   (``{{ docker.pull_tag }}``) image provides a prebuilt optimized environment for fine-tuning and pretraining a
-   model on AMD Instinct MI325X and MI300X accelerators. It includes the following software components to accelerate
+   model on AMD Instinct MI325X and MI300X GPUs. It includes the following software components to accelerate
   training workloads:

   .. list-table::
@@ -41,7 +41,7 @@ model training with GPU-optimized components for transformer-based models.
 Supported models
 ================

-The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
+The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X GPUs.
 Some instructions, commands, and training recommendations in this documentation might
 vary by model -- select one to get started.

@@ -126,7 +126,7 @@ popular AI models.
   The performance data presented in
   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
   should not be interpreted as the peak performance achievable by AMD
-   Instinct MI325X and MI300X accelerators or ROCm software.
+   Instinct MI325X and MI300X GPUs or ROCm software.

 System validation
 =================
@@ -299,28 +299,28 @@ Run training
                    - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0

                  * - ``torchdata``
-                    - `TorchData <https://pytorch.org/data/beta/index.html>`_
+                    - `TorchData <https://meta-pytorch.org/data/beta/index.html#torchdata>`__

                  * - ``tomli``
-                    - `Tomli <https://pypi.org/project/tomli/>`_
+                    - `Tomli <https://pypi.org/project/tomli/>`__

                  * - ``tiktoken``
-                    - `tiktoken <https://github.com/openai/tiktoken>`_
+                    - `tiktoken <https://github.com/openai/tiktoken>`__

                  * - ``blobfile``
-                    - `blobfile <https://pypi.org/project/blobfile/>`_
+                    - `blobfile <https://pypi.org/project/blobfile/>`__

                  * - ``tabulate``
-                    - `tabulate <https://pypi.org/project/tabulate/>`_
+                    - `tabulate <https://pypi.org/project/tabulate/>`__

                  * - ``wandb``
-                    - `Weights & Biases <https://github.com/wandb/wandb>`_
+                    - `Weights & Biases <https://github.com/wandb/wandb>`__

                  * - ``sentencepiece``
-                    - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
+                    - `SentencePiece <https://github.com/google/sentencepiece>`__ 0.2.0

                  * - ``tensorboard``
-                    - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
+                    - `TensorBoard <https://www.tensorflow.org/tensorboard>`__ 2.18.0

            .. container:: model-doc pyt_train_flux

@@ -336,50 +336,50 @@ Run training
                    - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_

                  * - ``datasets``
-                    - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
+                    - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`__ 3.2.0

                  * - ``sentencepiece``
-                    - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
+                    - `SentencePiece <https://github.com/google/sentencepiece>`__ 0.2.0

                  * - ``tensorboard``
-                    - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
+                    - `TensorBoard <https://www.tensorflow.org/tensorboard>`__ 2.18.0

                  * - ``csvkit``
-                    - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
+                    - `csvkit <https://csvkit.readthedocs.io/en/latest/>`__ 2.0.1

                  * - ``deepspeed``
-                    - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
+                    - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`__ 0.16.2

                  * - ``diffusers``
-                    - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
+                    - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`__ 0.31.0

                  * - ``GitPython``
-                    - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
+                    - `GitPython <https://github.com/gitpython-developers/GitPython>`__ 3.1.44

                  * - ``opencv-python-headless``
-                    - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
+                    - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`__ 4.10.0.84

                  * - ``peft``
-                    - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
+                    - `PEFT <https://huggingface.co/docs/peft/en/index>`__ 0.14.0

                  * - ``protobuf``
-                    - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
+                    - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`__ 5.29.2

                  * - ``pytest``
-                    - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
+                    - `PyTest <https://docs.pytest.org/en/stable/>`__ 8.3.4

                  * - ``python-dotenv``
-                    - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
+                    - `python-dotenv <https://pypi.org/project/python-dotenv/>`__ 1.0.1

                  * - ``seaborn``
-                    - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
+                    - `Seaborn <https://seaborn.pydata.org/>`__ 0.13.2

                  * - ``transformers``
-                    - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
+                    - `Transformers <https://huggingface.co/docs/transformers/en/index>`__ 4.47.0

            ``pytorch_benchmark_setup.sh`` downloads the following datasets from Hugging Face:

-            * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
+            * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`__

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -521,9 +521,14 @@ Run training

            For examples of benchmarking commands, see `<https://github.com/ROCm/MAD/tree/develop/benchmark/pytorch_train#benchmarking-examples>`__.

+.. _amd-pytorch-training-multinode-examples:
+
 Multi-node training
 -------------------

+Refer to :doc:`/how-to/rocm-for-ai/system-setup/multi-node-setup` to configure your environment for multi-node
+training. See :ref:`rocm-for-ai-multi-node-setup-pyt-train-example` for example Slurm run commands.
+
 Pre-training
 ~~~~~~~~~~~~

@@ -571,7 +576,7 @@ Further reading
 - To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.

 - To learn more about system settings and management practices to configure your system for
-  AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+  AMD Instinct MI300X series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.

 - For a list of other ready-made Docker images for AI with ROCm, see
  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
--- a/docs/how-to/rocm-for-hpc/index.rst
+++ b/docs/how-to/rocm-for-hpc/index.rst
@@ -76,6 +76,14 @@ Ubuntu versions.
          single node workstations, multi and many-core nodes, clusters of nodes via
          QMP, and classic vector computers.

+      * -
+        - `Grid <https://github.com/amd/InfinityHub-CI/tree/main/grid/>`_
+        - Grid is a library for lattice QCD calculations that employs a high-level data parallel
+          approach while using a number of techniques to target multiple types of parallelism.
+          The library currently supports MPI, OpenMP, and short vector parallelism. The SIMD
+          instruction sets covered include SSE, AVX, AVX2, FMA4, IMCI, and AVX512. Recent
+          releases expanded this support to include GPU offloading.
+
      * -
        - `MILC <https://github.com/amd/InfinityHub-CI/tree/main/milc/>`_
        - The MILC Code is a set of research codes developed by MIMD Lattice Computation
@@ -148,24 +156,6 @@ Ubuntu versions.
          backends ranging from general-purpose processors, CUDA and HIP enabled
          accelerators to SX-Aurora vector processors.

-      * -
-        - `nekRS <https://github.com/amd/InfinityHub-CI/tree/main/nekrs>`_
-        - nekRS is an open-source Navier Stokes solver based on the spectral element
-          method targeting classical processors and accelerators like GPUs.
-
-      * -
-        - `OpenFOAM <https://github.com/amd/InfinityHub-CI/tree/main/openfoam>`_
-        - OpenFOAM is a free, open-source computational fluid dynamics (CFD)
-          tool developed primarily by OpenCFD Ltd. It has a large user
-          base across most areas of engineering and science, from both commercial and
-          academic organizations. OpenFOAM has extensive features to solve
-          anything from complex fluid flows involving chemical reactions, turbulence, and
-          heat transfer, to acoustics, solid mechanics, and electromagnetics.
-
-      * -
-        - `PeleC <https://github.com/amd/InfinityHub-CI/tree/main/pelec>`_
-        - PeleC is an adaptive mesh refinement(AMR) solver for compressible reacting flows.
-
      * -
        - `Simcenter Star-CCM+ <https://github.com/amd/InfinityHub-CI/tree/main/siemens-star-ccm>`_
        - Simcenter Star-CCM+ is a comprehensive computational fluid dynamics (CFD) and multiphysics
@@ -199,15 +189,6 @@ Ubuntu versions.
          defined in SymPy to create and execute highly optimized Finite Difference stencil
          kernels on multiple computer platforms.

-      * -
-        - `ECHELON <https://github.com/amd/InfinityHub-CI/tree/main/srt-echelon>`_
-        - ECHELON by Stone Ridge Technology is a reservoir simulation tool. With
-          fast processing, it retains precise accuracy and preserves legacy simulator results.
-          Faster reservoir simulation enables reservoir engineers to produce many realizations,
-          address larger models, and use advanced physics. It opens new workflows based on
-          ensemble methodologies for history matching and forecasting that yield
-          increased accuracy and more predictive results.
-
      * - Benchmark
        - `rocHPL <https://github.com/amd/InfinityHub-CI/tree/main/rochpl>`_
        - HPL, or High-Performance Linpack, is a benchmark which solves a uniformly
@@ -240,6 +221,10 @@ Ubuntu versions.
        - Base container for GPU-aware MPI with ROCm for HPC applications. This
          project provides a boilerplate for building and running a Docker
          container with ROCm supporting GPU-aware MPI implementations using MPICH.
+
+      * - 
+        - `AMD ROCm with Conda Environment Container <https://github.com/amd/InfinityHub-CI/tree/main/conda-rocm-environment>`_
+        - Container recipe that uses the `base-gpu-mpi-rocm-docker` as the base and adds Conda. The container can be used as a base for applications that require conda applications.
      
      * -
        - `Kokkos <https://github.com/amd/InfinityHub-CI/tree/main/kokkos>`_
@@ -258,14 +243,6 @@ Ubuntu versions.
          range of hardware platforms via use of an in-built domain specific language derived
          from the Mako templating engine.

-      * -
-        - `PETSc <https://github.com/amd/InfinityHub-CI/tree/main/petsc>`_
-        - Portable, Extensible Toolkit for Scientific Computation (PETSc) is a suite of data structures
-          and routines for the scalable (parallel) solution of scientific applications modeled by partial
-          differential equations. It supports MPI, GPUs through CUDA, HIP, and OpenCL,
-          as well as hybrid MPI-GPU parallelism. It also supports the NEC-SX Tsubasa Vector Engine.
-          PETSc also includes the Toolkit for Advanced Optimization (TAO) library.
-
      * -
        - `RAJA <https://github.com/amd/InfinityHub-CI/tree/main/raja>`_
        - RAJA is a library of C++ software abstractions, primarily developed at Lawrence
@@ -278,4 +255,9 @@ Ubuntu versions.
          within an object-oriented software framework for the solution of large-scale,
          complex multi-physics engineering and scientific problems.

+      * -
+        - `VLLM <https://github.com/amd/InfinityHub-CI/tree/main/vllm>`_
+        - The VLLM project helps to build a Dockerfile for performance testing of the LLAMA2 applications.
+          This Dockerfile uses a base install that includes Ubuntu 20.04, ROCm 6.1.2 and Python 3.9. The container can host the LLAMA2 applications (LLMs) and requires some large input files for testing.
+
 To learn about ROCm for AI applications, see :doc:`../rocm-for-ai/index`.
--- a/docs/index.md
+++ b/docs/index.md
@@ -16,7 +16,7 @@ ROCm supports multiple programming languages and programming interfaces such as
 {doc}`HIP (Heterogeneous-Compute Interface for Portability)<hip:index>`, OpenCL,
 and OpenMP, as explained in the [Programming guide](./how-to/programming_guide.rst).

-If you're using AMD Radeon™ PRO or Radeon GPUs in a workstation setting with a display connected, review {doc}`Radeon-specific ROCm documentation<radeon:index>`.
+If you're using AMD Radeon GPUs or Ryzen APUs in a workstation setting with a display connected, review {doc}`ROCm on Radeon and Ryzen documentation<radeon:index>`.

 ROCm documentation is organized into the following categories:

@@ -29,7 +29,7 @@ ROCm documentation is organized into the following categories:

 * {doc}`ROCm on Linux <rocm-install-on-linux:reference/system-requirements>`
 * {doc}`HIP SDK on Windows <rocm-install-on-windows:reference/system-requirements>`
-* [ROCm on Radeon GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/index.html)
+* {doc}`ROCm on Radeon and Ryzen<radeon:index>`
 * {doc}`Deep learning frameworks </how-to/deep-learning-rocm>`
 * {doc}`Build from source </how-to/build-rocm>`
 :::
--- a/docs/reference/gpu-arch-specs.rst
+++ b/docs/reference/gpu-arch-specs.rst
@@ -628,6 +628,24 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 32
          - 12
          - 0
+        *
+          - Radeon RX 9060
+          - RDNA4
+          - gfx1200
+          - 8
+          - 28
+          - 32 or 64
+          - 128
+          - 32
+          - 4
+          - N/A
+          - 32
+          - 16
+          - 32
+          - 768
+          - 32
+          - 12
+          - 0
        *
          - Radeon RX 7900 XTX
          - RDNA3
--- a/docs/release/versions.md
+++ b/docs/release/versions.md
@@ -10,6 +10,7 @@

 | Version | Release date |
 | ------- | ------------ |
+| [7.0.2](https://rocm.docs.amd.com/en/docs-7.0.2/) | October 10, 2025 |
 | [7.0.1](https://rocm.docs.amd.com/en/docs-7.0.1/) | September 17, 2025 |
 | [7.0.0](https://rocm.docs.amd.com/en/docs-7.0.0/) | September 16, 2025 |
 | [6.4.3](https://rocm.docs.amd.com/en/docs-6.4.3/) | August 7, 2025 |
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -23,8 +23,8 @@ subtrees:
    title: ROCm on Linux
  - url: https://rocm.docs.amd.com/projects/install-on-windows/en/latest/
    title: HIP SDK on Windows
-  - url: https://rocm.docs.amd.com/projects/radeon/en/latest/index.html
-    title: ROCm on Radeon GPUs
+  - url: https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/index.html
+    title: ROCm on Radeon and Ryzen
  - file: how-to/deep-learning-rocm.md
    title: Deep learning frameworks
    subtrees:
@@ -49,6 +49,8 @@ subtrees:
        title: Ray compatibility
      - file: compatibility/ml-compatibility/llama-cpp-compatibility.rst
        title: llama.cpp compatibility
+      - file: compatibility/ml-compatibility/flashinfer-compatibility.rst
+        title: FlashInfer compatibility
  - file: how-to/build-rocm.rst
    title: Build ROCm from source

@@ -60,8 +62,15 @@ subtrees:
    - entries:
      - file: how-to/rocm-for-ai/install.rst
        title: Installation
-      - file: how-to/rocm-for-ai/system-health-check.rst
-        title: System health benchmarks
+      - file: how-to/rocm-for-ai/system-setup/index.rst
+        title: System setup
+        entries:
+        - file: how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
+          title: System validation
+        - file: how-to/rocm-for-ai/system-setup/multi-node-setup.rst
+          title: Multi-node setup
+        - file: how-to/rocm-for-ai/system-setup/system-health-check.rst
+          title: System health benchmarks
      - file: how-to/rocm-for-ai/training/index.rst
        title: Training
        subtrees:
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1,4 +1,4 @@
-rocm-docs-core==1.20.1
+rocm-docs-core==1.26.0
 sphinx-reredirects
 sphinx-sitemap
 sphinxcontrib.datatemplates==0.11.0
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile docs/sphinx/requirements.in
 #
 accessible-pygments==0.0.5
    # via pydata-sphinx-theme
@@ -10,7 +10,7 @@ alabaster==1.0.0
    # via sphinx
 asttokens==3.0.0
    # via stack-data
-attrs==25.3.0
+attrs==25.4.0
    # via
    #   jsonschema
    #   jupyter-cache
@@ -19,34 +19,32 @@ babel==2.17.0
    # via
    #   pydata-sphinx-theme
    #   sphinx
-beautifulsoup4==4.13.4
+beautifulsoup4==4.14.2
    # via pydata-sphinx-theme
 breathe==4.36.0
    # via rocm-docs-core
-certifi==2025.4.26
+certifi==2025.10.5
    # via requests
-cffi==1.17.1
+cffi==2.0.0
    # via
    #   cryptography
    #   pynacl
-charset-normalizer==3.4.2
+charset-normalizer==3.4.3
    # via requests
-click==8.2.1
+click==8.3.0
    # via
    #   jupyter-cache
    #   sphinx-external-toc
-comm==0.2.2
+comm==0.2.3
    # via ipykernel
-cryptography==45.0.3
+cryptography==46.0.2
    # via pyjwt
-debugpy==1.8.14
+debugpy==1.8.17
    # via ipykernel
 decorator==5.2.1
    # via ipython
 defusedxml==0.7.1
    # via sphinxcontrib-datatemplates
-deprecated==1.2.18
-    # via pygithub
 docutils==0.21.2
    # via
    #   myst-parser
@@ -54,17 +52,17 @@ docutils==0.21.2
    #   sphinx
 exceptiongroup==1.3.0
    # via ipython
-executing==2.2.0
+executing==2.2.1
    # via stack-data
-fastjsonschema==2.21.1
+fastjsonschema==2.21.2
    # via
    #   nbformat
    #   rocm-docs-core
 gitdb==4.0.12
    # via gitpython
-gitpython==3.1.44
+gitpython==3.1.45
    # via rocm-docs-core
-greenlet==3.2.3
+greenlet==3.2.4
    # via sqlalchemy
 idna==3.10
    # via requests
@@ -74,7 +72,7 @@ importlib-metadata==8.7.0
    # via
    #   jupyter-cache
    #   myst-nb
-ipykernel==6.29.5
+ipykernel==6.30.1
    # via myst-nb
 ipython==8.37.0
    # via
@@ -86,9 +84,9 @@ jinja2==3.1.6
    # via
    #   myst-parser
    #   sphinx
-jsonschema==4.24.0
+jsonschema==4.25.1
    # via nbformat
-jsonschema-specifications==2025.4.1
+jsonschema-specifications==2025.9.1
    # via jsonschema
 jupyter-cache==1.0.1
    # via myst-nb
@@ -106,17 +104,17 @@ markdown-it-py==3.0.0
    # via
    #   mdit-py-plugins
    #   myst-parser
-markupsafe==3.0.2
+markupsafe==3.0.3
    # via jinja2
 matplotlib-inline==0.1.7
    # via
    #   ipykernel
    #   ipython
-mdit-py-plugins==0.4.2
+mdit-py-plugins==0.5.0
    # via myst-parser
 mdurl==0.1.2
    # via markdown-it-py
-myst-nb==1.2.0
+myst-nb==1.3.0
    # via rocm-docs-core
 myst-parser==4.0.1
    # via myst-nb
@@ -134,31 +132,30 @@ nest-asyncio==1.6.0
 packaging==25.0
    # via
    #   ipykernel
-    #   pydata-sphinx-theme
    #   sphinx
-parso==0.8.4
+parso==0.8.5
    # via jedi
 pexpect==4.9.0
    # via ipython
-platformdirs==4.3.8
+platformdirs==4.4.0
    # via jupyter-core
-prompt-toolkit==3.0.51
+prompt-toolkit==3.0.52
    # via ipython
-psutil==7.0.0
+psutil==7.1.0
    # via ipykernel
 ptyprocess==0.7.0
    # via pexpect
 pure-eval==0.2.3
    # via stack-data
-pycparser==2.22
+pycparser==2.23
    # via cffi
-pydata-sphinx-theme==0.15.4
+pydata-sphinx-theme==0.16.1
    # via
    #   rocm-docs-core
    #   sphinx-book-theme
-pygithub==2.6.1
+pygithub==2.8.1
    # via rocm-docs-core
-pygments==2.19.1
+pygments==2.19.2
    # via
    #   accessible-pygments
    #   ipython
@@ -166,11 +163,11 @@ pygments==2.19.1
    #   sphinx
 pyjwt[crypto]==2.10.1
    # via pygithub
-pynacl==1.5.0
+pynacl==1.6.0
    # via pygithub
 python-dateutil==2.9.0.post0
    # via jupyter-client
-pyyaml==6.0.2
+pyyaml==6.0.3
    # via
    #   jupyter-cache
    #   myst-nb
@@ -178,7 +175,7 @@ pyyaml==6.0.2
    #   rocm-docs-core
    #   sphinx-external-toc
    #   sphinxcontrib-datatemplates
-pyzmq==26.4.0
+pyzmq==27.1.0
    # via
    #   ipykernel
    #   jupyter-client
@@ -186,13 +183,13 @@ referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
-requests==2.32.4
+requests==2.32.5
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==1.20.1
-    # via -r requirements.in
-rpds-py==0.25.1
+rocm-docs-core==1.26.0
+    # via -r docs/sphinx/requirements.in
+rpds-py==0.27.1
    # via
    #   jsonschema
    #   referencing
@@ -202,7 +199,7 @@ smmap==5.0.2
    # via gitdb
 snowballstemmer==3.0.1
    # via sphinx
-soupsieve==2.7
+soupsieve==2.8
    # via beautifulsoup4
 sphinx==8.1.3
    # via
@@ -220,7 +217,7 @@ sphinx==8.1.3
    #   sphinx-reredirects
    #   sphinxcontrib-datatemplates
    #   sphinxcontrib-runcmd
-sphinx-book-theme==1.1.4
+sphinx-book-theme==1.1.3
    # via rocm-docs-core
 sphinx-copybutton==0.5.2
    # via rocm-docs-core
@@ -233,13 +230,13 @@ sphinx-last-updated-by-git==0.3.8
 sphinx-notfound-page==1.1.0
    # via rocm-docs-core
 sphinx-reredirects==0.1.6
-    # via -r requirements.in
-sphinx-sitemap==2.8.0
-    # via -r requirements.in
+    # via -r docs/sphinx/requirements.in
+sphinx-sitemap==2.9.0
+    # via -r docs/sphinx/requirements.in
 sphinxcontrib-applehelp==2.0.0
    # via sphinx
 sphinxcontrib-datatemplates==0.11.0
-    # via -r requirements.in
+    # via -r docs/sphinx/requirements.in
 sphinxcontrib-devhelp==2.0.0
    # via sphinx
 sphinxcontrib-htmlhelp==2.1.0
@@ -252,7 +249,7 @@ sphinxcontrib-runcmd==0.2.0
    # via sphinxcontrib-datatemplates
 sphinxcontrib-serializinghtml==2.0.0
    # via sphinx
-sqlalchemy==2.0.41
+sqlalchemy==2.0.43
    # via jupyter-cache
 stack-data==0.6.3
    # via ipython
@@ -260,13 +257,12 @@ tabulate==0.9.0
    # via jupyter-cache
 tomli==2.2.1
    # via sphinx
-tornado==6.5.1
+tornado==6.5.2
    # via
    #   ipykernel
    #   jupyter-client
 traitlets==5.14.3
    # via
-    #   comm
    #   ipykernel
    #   ipython
    #   jupyter-client
@@ -274,9 +270,10 @@ traitlets==5.14.3
    #   matplotlib-inline
    #   nbclient
    #   nbformat
-typing-extensions==4.14.0
+typing-extensions==4.15.0
    # via
    #   beautifulsoup4
+    #   cryptography
    #   exceptiongroup
    #   ipython
    #   myst-nb
@@ -288,9 +285,7 @@ urllib3==2.5.0
    # via
    #   pygithub
    #   requests
-wcwidth==0.2.13
+wcwidth==0.2.14
    # via prompt-toolkit
-wrapt==1.17.2
-    # via deprecated
 zipp==3.23.0
    # via importlib-metadata
--- a/tools/rocm-build/rocm-7.0.2.xml
+++ b/tools/rocm-build/rocm-7.0.2.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest>
+    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
+    <default revision="refs/tags/rocm-7.0.2"
+     remote="rocm-org"
+     sync-c="true"
+     sync-j="4" />
+<!--list of projects for ROCm-->
+    <project name="ROCm" revision="roc-7.0.x" />
+    <project name="ROCK-Kernel-Driver" />
+    <project name="ROCR-Runtime" />
+    <project name="amdsmi" />
+    <project name="aqlprofile" />
+    <project name="rdc" />
+    <project name="rocm_bandwidth_test" />
+    <project name="rocm_smi_lib" />
+    <project name="rocm-core" />
+    <project name="rocm-examples" />
+    <project name="rocminfo" />
+    <project name="rocprofiler" />
+    <project name="rocprofiler-register" />
+    <project name="rocprofiler-sdk" />
+    <project name="rocprofiler-compute" />
+    <project name="rocprofiler-systems" />
+    <project name="roctracer" />
+<!--HIP Projects-->
+    <project name="hip" />
+    <project name="hip-tests" />
+    <project name="HIPIFY" />
+    <project name="clr" />
+    <project name="hipother" />
+<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
+    <project name="half" />
+    <project name="llvm-project" />
+    <project name="spirv-llvm-translator" />
+<!-- gdb projects -->
+    <project name="ROCdbgapi" />
+    <project name="ROCgdb" />
+    <project name="rocr_debug_agent" />
+<!-- ROCm Libraries -->
+    <project groups="mathlibs" name="AMDMIGraphX" />
+    <project groups="mathlibs" name="MIVisionX" />
+    <project groups="mathlibs" name="ROCmValidationSuite" />
+    <project groups="mathlibs" name="composable_kernel" />
+    <project groups="mathlibs" name="hipTensor" />
+    <project groups="mathlibs" name="hipfort" />
+    <project groups="mathlibs" name="rccl" />
+    <project groups="mathlibs" name="rocAL" />
+    <project groups="mathlibs" name="rocALUTION" />
+    <project groups="mathlibs" name="rocDecode" />
+    <project groups="mathlibs" name="rocJPEG" />
+    <!-- The following components have been migrated to rocm-libraries:
+        hipBLAS-common hipBLAS hipBLASLt hipCUB
+        hipFFT hipRAND hipSPARSE hipSPARSELt
+        MIOpen rocBLAS rocFFT rocPRIM rocRAND
+        rocSPARSE rocThrust Tensile -->
+    <project groups="mathlibs" name="rocm-libraries" />
+    <project groups="mathlibs" name="rocPyDecode" />
+    <project groups="mathlibs" name="rocSHMEM" />
+    <project groups="mathlibs" name="rocWMMA" />
+    <project groups="mathlibs" name="rocm-cmake" />
+    <project groups="mathlibs" name="rpp" />
+    <project groups="mathlibs" name="TransferBench" />
+<!-- Projects for OpenMP-Extras -->
+    <project name="aomp" path="openmp-extras/aomp" />
+    <project name="aomp-extras" path="openmp-extras/aomp-extras" />
+    <project name="flang" path="openmp-extras/flang" />
+</manifest>
Author	SHA1	Message	Date
David Dixon	b6cc206cda	Apply suggestion from @davidd-amd	2025-10-15 22:47:02 -06:00
David Dixon	c00dc5f7ab	Apply suggestion from @davidd-amd	2025-10-15 22:46:54 -06:00
David Dixon	f996f6dd75	Update yamlcpp.yml	2025-10-15 22:40:55 -06:00
David Dixon	170e5055dd	add config options	2025-10-15 22:31:10 -06:00
David Dixon	326559bd52	Update yaml-cpp.yml for Azure Pipelines	2025-10-15 22:29:03 -06:00
David Dixon	ac93737996	remove hyphen	2025-10-15 22:27:42 -06:00
David Dixon	a8a6a96459	use amdclang for spdlog	2025-10-15 22:22:09 -06:00
David Dixon	a894ed863c	use amdclang for fmt	2025-10-15 22:21:34 -06:00
David Dixon	da3cb18b38	Create yaml-cpp.yml	2025-10-15 22:20:49 -06:00
David Dixon	ac3c22a4f7	Create yaml-cpp.yml	2025-10-15 22:19:10 -06:00
David Dixon	6e7422ded7	Update cli11.yml for Azure Pipelines (#5523 )	2025-10-15 10:47:29 -06:00
Istvan Kiss	7b7ff53985	Update Radeon link (#5453 )	2025-10-15 17:25:05 +02:00
David Dixon	019796dc63	[external] Create cli11.yml (#5522 )	2025-10-15 09:19:56 -06:00
Pratik Basyal	f21cfe1171	GitHub issue added to 702 known issues (#5520 ) * GitHub issue added to 702 known issues * Added missing RCCL changelog	2025-10-15 09:58:23 -04:00
Jan Stephan	170cb47a4f	Merge pull request #5512 from j-stephan/rocm-examples-deps [Ex CI] Add libtiff-dev, libopencv-dev and rpp	2025-10-15 10:02:46 +02:00
Braden Stefanuk	d19a8e4a83	[superbuild] Add dependencies for hipblaslt and origami (#5487 ) * ci: add deps for origami in superbuild * ci: add rocm path to system path * build: add pip msgpack dep	2025-10-14 16:05:24 -06:00
amd-hsivasun	3a0b8529ed	[Ex CI] Added MIOpen to the test dependencies for rocm-examples (#5517 )	2025-10-14 14:56:36 -04:00
Joseph Macaranas	f9d7fc2e6a	[External CI] Add libsimde-dev to ROCR pipeline (#5515 )	2025-10-14 14:24:45 -04:00
Nilesh M Negi	d424687191	[Ex CI] Increase RCCL build time limit to 120mins (#5516 )	2025-10-14 12:59:40 -05:00
Jan Stephan	35e6e50888	[Ex CI] Add libopencv-dev Signed-off-by: Jan Stephan <jan.stephan@amd.com>	2025-10-13 20:00:25 +02:00
Jan Stephan	91cfe98eb3	[Ex CI] Add libtiff-dev and rpp Signed-off-by: Jan Stephan <jan.stephan@amd.com>	2025-10-13 17:42:59 +02:00
Pratik Basyal	036aaa2e78	ROCm for HPC topic updated Develop (#5504 ) * ROCm for HPC topic updated * ROCm for HPC topic udpated * Minor editorial	2025-10-10 22:31:51 -04:00
Pratik Basyal	78258e0f85	702 compatibility Footnote updated (#5502 ) * Footnote updated * Minor update * Minor update * Break added * Line break added * Line break * Footnote updated * Minor correction	2025-10-10 21:23:07 -04:00
amd-hsong	c79d9f74ef	Merge pull request #5490 Re-enable device_merge_inplace unit test for rocPRIM	2025-10-10 15:03:23 -06:00
amd-hsivasun	fb1b78c6f0	[Ex CI] Added Component and Module Dependencies (#5489 ) * [Ex CI] Added Component and Module Dependencies * Add registerROCmPackages flag	2025-10-10 16:01:11 -04:00
peterjunpark	3a70d75f5e	Fix documented AMD SMI version (ROCm 7.0.2) (#5496 )	2025-10-10 15:09:20 -04:00
alexxu-amd	61e1f088a1	Merge pull request #5492 from ROCm/sync-dev-from-internal Sync dev from internal for 7.0.2 GA	2025-10-10 11:17:32 -04:00
Pratik Basyal	1f6e5c5e04	Update compatibility-matrix.rst	2025-10-10 11:10:48 -04:00
Pratik Basyal	e8a0769842	Update RELEASE.md	2025-10-10 11:07:51 -04:00
Alex Xu	6f9579d052	Merge remote-tracking branch 'internal/develop' into sync-dev-from-internal	2025-10-10 11:02:33 -04:00
Pratik Basyal	245d53a021	Merge pull request #579 from prbasyal-amd/post-rc3-702-update GPU resiliency highlight updated 702	2025-10-10 11:00:59 -04:00
Alex Xu	35dbbb22bc	fix linting	2025-10-10 10:29:13 -04:00
alexxu-amd	03dc8cee00	Merge pull request #584 from ROCm/sync-dev-from-external Sync dev from external	2025-10-10 10:14:56 -04:00
Alex Xu	323e5fd27a	Merge remote-tracking branch 'external/develop' into sync-dev-from-external	2025-10-10 10:13:08 -04:00
alexxu-amd	b11fd7b492	Update versions.md (#583 )	2025-10-10 09:31:24 -04:00
srayasam-amd	5e2efa05a6	7.0.2 GA update (#5491 ) * 7.0.2 GA update * Create rocm-7.0.2.xml	2025-10-10 18:47:48 +05:30
Hao Song	29a90f0271	[rocPRIM] Re-enable device_merge_inplace unit test for rocPRIM	2025-10-09 21:48:11 +00:00
randyh62	c06242bb89	Update RELEASE.md (#581 ) * Update RELEASE.md Remove support for rocBlas and hipBlasLt * Update CHANGELOG.md Removed from the Changelog as well.	2025-10-09 13:15:08 -07:00
peterjunpark	68e8453ca5	Update vLLM doc for 10/6 release and bump rocm-docs-core to 1.26.0 (#5481 ) * archive previous doc version * update model/docker data and doc templates * Update "Reproducing the Docker image" * fix: truncated commit hash doesn't work for some reason * bump rocm-docs-core to 1.26.0 * fix numbering fix * update docker tag * update .wordlist.txt	2025-10-08 16:23:40 -04:00
Pratik Basyal	503b8bcc86	Framework and changelog updated (#5483 ) * Framework and chaneglog updated * Wordlist updated	2025-10-08 15:05:11 -04:00
amd-hsivasun	e3d97d339a	[Ex CI] Added rocJPEG and rocprofiler-sdk	2025-10-08 14:47:44 -04:00
alexxu-amd	978c58d196	Merge pull request #577 from ROCm/sync-develop-from-external Sync develop from external	2025-10-08 14:25:03 -04:00
alexxu-amd	a366048b64	Merge branch 'develop' into sync-develop-from-external	2025-10-08 14:12:14 -04:00
Pratik Basyal	4c3e33c291	Compatibility matrix and changelog synced for ROCm 7.0.2 (#576 ) * Compatibility matrix and changelog synced * Indentation updated * OS updated	2025-10-08 14:11:15 -04:00
Alex Xu	89758e67d8	Merge remote-tracking branch 'external/develop' into sync-develop-from-external	2025-10-08 14:03:34 -04:00
Pratik Basyal	5d0f201b4d	7.0.2 review update (#575 ) * 7.0.2 review update * Tensorflow footnote updated * Wordlist added	2025-10-08 12:35:14 -04:00
Pratik Basyal	e3677d89a6	PLDM bundle info updated for 7.0.2 (#574 ) * PLDM bundle info updated * Driver dependency added to GPU resiliency * Known issue for Migrpahx added * Footnote added * Known issue for OpenCV updated * Leo's feedback incorporated * Radeon 9060 updated * Known issues updated	2025-10-08 11:00:42 -04:00
amd-hsivasun	f20edab8fc	[Ex CI] Update CMake Flags for hipTensor	2025-10-07 15:21:39 -04:00
Pratik Basyal	6f84d50011	ROCm 7.0.2 Post RC3 update (#573 ) * Space minimized * OS support updated * Minor change	2025-10-06 14:08:01 -04:00
Pratik Basyal	57dd082f28	Post RC2 7.0.2 review feedback updated (#571 ) * Known issue updated * Space optimized * Changelog updated * Apply suggestions from code review Leo's review feedback incorporated Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Highlight changes * Highlight and OS support updated * GPU resiliency highlight updated * Highlights updated * ROCm-EP deprecation added * Apply suggestions from code review leo's feedback incorporated Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * PLDM update --------- Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>	2025-10-06 12:04:09 -04:00
peterjunpark	eeea0d2180	Fix heading levels in pages using embedded templates (#5468 )	2025-10-03 13:33:14 -04:00
anisha-amd	93c6d17922	Docs: frameworks 25.09 - compatibility - FlashInfer and llama.cpp (#5462 )	2025-10-02 13:51:36 -04:00
amd-hsivasun	f91c2b9b4a	Update dependencies-rocm.yml	2025-10-01 15:31:35 -04:00
amd-hsivasun	5e6b66ca39	Remove tasks to locate test dir	2025-10-01 15:30:37 -04:00
amd-hsivasun	6b8b359d03	Updated test dir to s/build/tests	2025-10-01 15:30:37 -04:00
amd-hsivasun	38e659e5f0	Update testDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	0894547f5a	Update setupenv	2025-10-01 15:30:37 -04:00
amd-hsivasun	aca31170c4	Update setupenv	2025-10-01 15:30:37 -04:00
amd-hsivasun	d21ec9eea5	Updated testDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	189c269350	Added Debug	2025-10-01 15:30:37 -04:00
amd-hsivasun	774cb7a1b3	Changed testDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	024cb4db76	Added testDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	945fb286f7	Find tests Task	2025-10-01 15:30:37 -04:00
amd-hsivasun	ee93101541	Change list files	2025-10-01 15:30:37 -04:00
amd-hsivasun	e31841312b	Update testDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	41b5298659	Added a list for all rp-systems files	2025-10-01 15:30:37 -04:00
amd-hsivasun	58790154b2	Add a script to look for setup-env.sh	2025-10-01 15:30:37 -04:00
amd-hsivasun	6f7f73ac0b	Update workingDirectories	2025-10-01 15:30:37 -04:00
amd-hsivasun	b2e3bc8565	[Ex CI] Updated rp-systems CMakeBuildDir	2025-10-01 15:30:37 -04:00
amd-hsivasun	52979e2fdb	[Ex CI] Updated testDir for rp-systems tests	2025-10-01 15:30:37 -04:00
peterjunpark	0ea5216ace	docs: update article_info in conf.py (#5454 )	2025-10-01 13:17:50 -04:00
peterjunpark	2e1b4dd5ee	Add multi-node setup instructions for training perf Dockers (#5449 ) --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com>	2025-09-30 14:53:38 -04:00
Pratik Basyal	5c7b993c0c	7.0.2 release changes (#568 ) * Initial changes for 7.0.2 * Heading level updated * Release notes changes * rocsolver added * Known issues updated * Highlights updated * RN changes * Release highlights for AI applications updated * AI developer contents added * leo's review feedback added * Compatibility matrix updated * GPU driver support	2025-09-30 14:02:04 -04:00
amd-hsivasun	2d79b3c4bd	[Ex CI] Added rocm-cmake dependency	2025-09-30 14:00:16 -04:00
Peter Park	fd59b5fbac	fix links in docs (#5446 )	2025-09-29 15:27:32 -04:00
amd-hsivasun	0a643f4686	[Ex CI] Enable aqlprofile	2025-09-26 14:42:15 -04:00
amd-hsivasun	d9e5744f7a	Update testExecutable	2025-09-26 14:01:02 -04:00
amd-hsivasun	ccb849ec02	Added python3-pip to aptModules	2025-09-26 14:01:02 -04:00
amd-hsivasun	42d4867964	Removed more aptPackages	2025-09-26 14:01:02 -04:00
amd-hsivasun	375359a5dd	Added ninja to aptPackages	2025-09-26 14:01:02 -04:00
amd-hsivasun	e92745f1ff	Removed apt and pip modules	2025-09-26 14:01:02 -04:00
amd-hsivasun	0fa72358d3	Remove registerROCm packages flag	2025-09-26 14:01:02 -04:00
amd-hsivasun	6fec268a4e	Removed package manager	2025-09-26 14:01:02 -04:00
amd-hsivasun	ff14cd1ff5	Added pyyaml	2025-09-26 14:01:02 -04:00
amd-hsivasun	8f65688653	Added registerROCmPackages	2025-09-26 14:01:02 -04:00
amd-hsivasun	33d1493adb	Removed dependencies	2025-09-26 14:01:02 -04:00
amd-hsivasun	4b6c7776a2	Updated parameters	2025-09-26 14:01:02 -04:00
amd-hsivasun	af811daa1b	Added GPUTarget	2025-09-26 14:01:02 -04:00
amd-hsivasun	d6c045e482	Update test parameters	2025-09-26 14:01:02 -04:00
amd-hsivasun	78b24cad39	Update test pool	2025-09-26 14:01:02 -04:00
amd-hsivasun	753a94c0bb	Add test step to buildjob	2025-09-26 14:01:02 -04:00
amd-hsivasun	6ecad57c62	Revert pool changes	2025-09-26 14:01:02 -04:00
amd-hsivasun	977554809a	Changed cmake prefix path	2025-09-26 14:01:02 -04:00
amd-hsivasun	7b00f4493b	Removed module and prefix path	2025-09-26 14:01:02 -04:00
amd-hsivasun	95c439a272	Removed Compiler Path	2025-09-26 14:01:02 -04:00
amd-hsivasun	94e04fbdc0	Updated testpool	2025-09-26 14:01:02 -04:00
amd-hsivasun	7ab59de8af	Update testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	175c817563	Change testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	25516d312e	Updated testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	30c345629a	Changed testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	210dc94bbb	Removed testExecutable	2025-09-26 14:01:02 -04:00
amd-hsivasun	a54023ccb8	Changed testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	17e3362dc7	Add Checkout to testjob	2025-09-26 14:01:02 -04:00
amd-hsivasun	0f9c0d884d	Updated testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	c890de4b16	Added Path to Gtest	2025-09-26 14:01:02 -04:00
amd-hsivasun	4ea77ab515	Added Tests	2025-09-26 14:01:02 -04:00
amd-hsivasun	c0512612f4	Updated testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	1c81ac3747	Updated testdir path	2025-09-26 14:01:02 -04:00
amd-hsivasun	4bafa42e52	Updated test parameters	2025-09-26 14:01:02 -04:00
amd-hsivasun	493801e670	Updated testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	1a5152b7b3	Removed testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	874c881012	Fixed testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	bdcaeea74c	Updated testdir	2025-09-26 14:01:02 -04:00
amd-hsivasun	b02669acf7	Fixed Dependencies	2025-09-26 14:01:02 -04:00
amd-hsivasun	844f10b2b1	Updated denendecies-other variables	2025-09-26 14:01:02 -04:00
amd-hsivasun	d6c14920b4	External CI: Build pipeline for aqlprofile	2025-09-26 14:01:02 -04:00
amd-hsivasun	4affe10a7c	[Ex CI] Update pipeline Id for rdc to monorepo	2025-09-26 12:38:57 -04:00
amd-hsivasun	81341ef435	Add New Line	2025-09-26 11:41:21 -04:00
amd-hsivasun	abacd328f9	[Ex CI] Added rocRand to rocmDependencies	2025-09-26 11:41:21 -04:00
amd-hsivasun	80b2fb6e26	[Ex CI] Add hipRAND to rocmDependencies	2025-09-26 11:41:21 -04:00
amd-hsivasun	b53e8decfc	[Ex CI] Enable rdc monorepo	2025-09-26 11:41:21 -04:00
amd-hsivasun	5fcc2eafde	[Ex CI] Update pipeline Id for rocprofiler-sdk to monorepo	2025-09-25 16:49:07 -04:00
amd-hsivasun	2eb0d77bc6	Updated testDir	2025-09-25 13:20:37 -04:00
amd-hsivasun	d84b41908f	Changed Testdir	2025-09-25 13:20:37 -04:00
amd-hsivasun	986f8284d1	[Ex CI] Update testDir for rocprofiler-sdk	2025-09-25 13:20:37 -04:00
Pratik Basyal	d92d9268dc	Use of Radeon and Ryzen reference updated [Develop] (#5432 ) * Use of Radeon and Ryzen reference updated * Pytorch link update	2025-09-24 19:07:41 -05:00
Ibrahim Wani	1629d3f0ea	Add origami yaml based tests to azure pipelines (#5431 ) * Add origami yaml tests * Dependency fix in origami.yml * Fix almalinux dependency; get publish test results step working * Fix almalinux dependency issue	2025-09-24 14:49:51 -06:00
Pratik Basyal	6cf6b34b2e	TOC for ROCm on Radeon and Ryzen updated (#5429 )	2025-09-24 13:58:26 -05:00
Pratik Basyal	c35a0a121a	ROR link and text updated (#5426 )	2025-09-24 13:28:13 -05:00
amd-hsivasun	412e383654	[Ex CI] Update pipeline Id for rocprofiler-sdk	2025-09-23 15:56:49 -04:00
Pratik Basyal	39f6fc187d	rocm-core version updated (#5418 )	2025-09-23 15:49:33 -04:00
amd-hsivasun	05b480fb28	Update rocm-examples.yml	2025-09-23 12:10:11 -04:00
amd-hsivasun	4fa44d90db	Updated dependencies-cmake-custom.yml default ver	2025-09-23 12:10:11 -04:00
amd-hsivasun	c9ef13d823	Added Custom Cmake to testjobs	2025-09-23 12:10:11 -04:00
amd-hsivasun	f02172050b	Added rocWMMA dependency	2025-09-23 12:10:11 -04:00
amd-hsivasun	154dbe297a	Updated File to take custom cmake version	2025-09-23 12:10:11 -04:00
amd-hsivasun	993a0a4fd4	[Ex CI] Update cmake	2025-09-23 12:10:11 -04:00
amd-hsivasun	c03662f410	[Ex CI] Update pipeline Id for origami to monorepo	2025-09-23 11:17:39 -04:00
Peter Park	442d7e4750	Add env var note to vllm.rst for MoE models and fix links in docs (#5415 ) * docs(vllm.rst): add performance note for MoE models * docs: fix links update vllm readme link 20250521 fix links	2025-09-22 15:58:43 -04:00
Pratik Basyal	a09a8f517e	PLDM version for 7.0.0 updated (#5412 )	2025-09-22 11:14:07 -04:00
Pratik Basyal	0bbaab645d	rocSHMEM and ROCprofiler-SDK highlight update (#5408 ) (#5409 ) * rocSHMEM and ROCprofiler-SDK highlight update (#5408) * Update RELEASE.md	2025-09-22 10:26:12 -04:00
Ibrahim Wani	4b80405e2e	Add set -e to exit when test fails (#5398 )	2025-09-19 10:43:35 -06:00
Peter Park	d92e5b6c12	Update Primus Megatron doc v25.8 (#5396 ) * megatron: update previous versions list update wording * megatron: update rst and yaml update primus repo link update mig guide * update headings and anchors * megatron: update doc * update docker hub urls	2025-09-19 08:09:21 -04:00
Pratik Basyal	91fce2e134	rocpd highlight updated (#5393 )	2025-09-18 19:00:36 -04:00
Peter Park	27d53cf082	Remove duplicate ML FW docker image support table (#5389 )	2025-09-18 17:06:53 -04:00
Pratik Basyal	bc084246be	Reference to AMD GPU Driver 30.10 release notes updated (#5380 )	2025-09-18 13:34:46 -05:00
Peter Park	9827ba7ff2	docs: MaxText v25.7 patch update (#5372 ) * remove jax 0.6.0 nanoo fp8 caveat note * reorder maxtext docker images in data sheet	2025-09-17 16:25:46 -04:00