add catch2

2026-01-14 09:08:04 -05:00 · 2025-09-05 00:43:38 +00:00
230 changed files with 6025 additions and 35229 deletions
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -128,9 +128,6 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -155,7 +152,6 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
@@ -196,9 +192,6 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -224,7 +217,6 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
--- a/.azuredevops/components/HIP.yml
+++ b/.azuredevops/components/HIP.yml
@@ -34,7 +34,6 @@ parameters:
  default:
    - cmake
    - libnuma-dev
    - libsimde-dev
    - mesa-common-dev
    - ninja-build
    - ocl-icd-libopencl1
--- a/.azuredevops/components/HIPIFY.yml
+++ b/.azuredevops/components/HIPIFY.yml
@@ -79,7 +79,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - task: Bash@3
      displayName: Add lit to PATH
      inputs:
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -131,7 +131,7 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -212,7 +212,7 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/ROCR-Runtime.yml
+++ b/.azuredevops/components/ROCR-Runtime.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: ROCR-Runtime
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -37,7 +18,6 @@ parameters:
    - libdrm-dev
    - libelf-dev
    - libnuma-dev
    - libsimde-dev
    - ninja-build
    - pkg-config
 - name: rocmDependencies
@@ -65,10 +45,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: ROCR_Runtime_build_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    pool:
      vmImage: 'ubuntu-22.04'
    ${{ if eq(job.os, 'almalinux8') }}:
@@ -89,18 +65,14 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        os: ${{ job.os }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        useAmdclang: false
        extraBuildFlags: >-
@@ -110,112 +82,105 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
    #     aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
-    - job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
+    dependsOn: ROCR_Runtime_build_${{ job.os }}
-      dependsOn: ROCR_Runtime_build_${{ job.os }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        packageManager: ${{ job.packageManager }}
-          packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
-        parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-          ${{ if parameters.triggerDownstreamJobs }}:
+      parameters:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+        checkoutRepo: ${{ parameters.checkoutRepo }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-        parameters:
+      parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
+        runRocminfo: false
-          sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+    - task: Bash@3
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      displayName: Build kfdtest
-        parameters:
+      inputs:
-          runRocminfo: false
+        targetType: 'inline'
-      - task: Bash@3
+        workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
-        displayName: Build kfdtest
+        script: |
-        inputs:
+          if [ -e /opt/rh/gcc-toolset-14/enable ]; then
-          targetType: 'inline'
+            source /opt/rh/gcc-toolset-14/enable
-          workingDirectory: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest
+          fi
-          script: |
+          mkdir build && cd build
-            if [ -e /opt/rh/gcc-toolset-14/enable ]; then
+          cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
-              source /opt/rh/gcc-toolset-14/enable
+          make
-            fi
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-            mkdir build && cd build
+      parameters:
-            cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
+        componentName: kfdtest
-            make
+        testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
-        parameters:
+        testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
-          componentName: kfdtest
+        os: ${{ job.os }}
-          testExecutable: BIN_DIR=$(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
+    - task: Bash@3
-          testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
+      displayName: Build rocrtst
-          testDir: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/scripts
+      inputs:
-          os: ${{ job.os }}
+        targetType: 'inline'
-      - task: Bash@3
+        workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
-        displayName: Build rocrtst
+        script: |
-        inputs:
+          echo $(Build.SourcesDirectory)/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-          targetType: 'inline'
+          sudo cat /etc/ld.so.conf.d/rocm-ci.conf
-          workingDirectory: $(Agent.BuildDirectory)/s/rocrtst/suites/test_common
+          sudo ldconfig -v
-          script: |
+          ldconfig -p
-            echo $(Agent.BuildDirectory)/s/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+          if [ -e /opt/rh/gcc-toolset-14/enable ]; then
-            sudo cat /etc/ld.so.conf.d/rocm-ci.conf
+            source /opt/rh/gcc-toolset-14/enable
-            sudo ldconfig -v
+          fi
-            ldconfig -p
+          BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
-            if [ -e /opt/rh/gcc-toolset-14/enable ]; then
+          export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
-              source /opt/rh/gcc-toolset-14/enable
+          mkdir build && cd build
-            fi
+          cmake .. \
-            BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
+            -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
-            export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
+            -DTARGET_DEVICES=${{ job.target }} \
-            mkdir build && cd build
+            -DROCM_DIR=$(Agent.BuildDirectory)/rocm \
-            cmake .. \
+            -DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
-              -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
+            -DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include
-              -DTARGET_DEVICES=${{ job.target }} \
+          make
-              -DROCM_DIR=$(Agent.BuildDirectory)/rocm \
+          make rocrtst_kernels
-              -DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-              -DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include
+      parameters:
-            make
+        componentName: rocrtst
-            make rocrtst_kernels
+        testExecutable: ./rocrtst64
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
-        parameters:
+        testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/${{ job.target }}
-          componentName: rocrtst
+        os: ${{ job.os }}
-          testExecutable: ./rocrtst64
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
+      parameters:
-          testDir: $(Agent.BuildDirectory)/s//rocrtst/suites/test_common/build/${{ job.target }}
+        aptPackages: ${{ parameters.aptPackages }}
-          os: ${{ job.os }}
+        environment: test
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+  # docker image will be missing libhwloc5
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
    # docker image will be missing libhwloc5
--- a/.azuredevops/components/amdsmi.yml
+++ b/.azuredevops/components/amdsmi.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: amdsmi
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -50,7 +31,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
+  - job: amdsmi_build_${{ job.os }}
    pool:
      ${{ if eq(job.os, 'ubuntu2404') }}:
        vmImage: 'ubuntu-24.04'
@@ -74,7 +55,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
@@ -85,54 +65,50 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        os: ${{ job.os }}
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
    #     aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: amdsmi_test_${{ job.os }}_${{ job.target }}
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
+    dependsOn: amdsmi_build_${{ job.os }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        packageManager: ${{ job.packageManager }}
-          packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      parameters:
-        parameters:
+        runRocminfo: false
-          runRocminfo: false
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
-        parameters:
+        componentName: amdsmi
-          componentName: ${{ parameters.componentName }}
+        testDir: '$(Agent.BuildDirectory)'
-          testDir: '$(Agent.BuildDirectory)'
+        testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
-          testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        environment: test
-          environment: test
+        gpuTarget: ${{ job.target }}
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/aqlprofile.yml
+++ b/.azuredevops/components/aqlprofile.yml
@@ -1,174 +0,0 @@
 parameters:
 - name: componentName
  type: string
  default: aqlprofile
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
  type: boolean
  default: false
 - name: aptPackages
  type: object
  default:
    - cmake
    - git
    - ninja-build
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
    - clr
    - llvm-project
    - ROCR-Runtime
 - name: rocmTestDependencies
  type: object
  default:
    - clr
    - llvm-project
    - ROCR-Runtime
    - rocprofiler-register
 - name: jobMatrix
  type: object
  default:
    buildJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
      parameters:
        dependencyList:
          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        consolidateBuildAndInstall: true
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/aqlprofile/cmake_modules
          -DAQLPROFILE_BUILD_TESTS=ON
          -DGPU_TARGETS=${{ job.target }}
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - ${{ if eq(job.os, 'ubuntu2204') }}:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          gpuTarget: ${{ job.target }}
 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
      condition:
        and(succeeded(),
          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
          eq(${{ parameters.aggregatePipeline }}, False)
        )
      variables:
      - group: common
      - template: /.azuredevops/variables-global.yml
      pool: ${{ job.target }}_test_pool
      workspace:
        clean: all
      steps:
      - checkout: none
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          packageManager: ${{ job.packageManager }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
        parameters:
          preTargetFilter: ${{ parameters.componentName }}
          gpuTarget: ${{ job.target }}
          os: ${{ job.os }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
        parameters:
          checkoutRef: ${{ parameters.checkoutRef }}
          dependencyList: ${{ parameters.rocmTestDependencies }}
          gpuTarget: ${{ job.target }}
          os: ${{ job.os }}
          ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
        parameters:
          componentName: ${{ parameters.componentName }}
          testDir: $(Agent.BuildDirectory)/rocm/share/hsa-amd-aqlprofile/
          testExecutable: ./run_tests.sh
          testParameters: ''
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hip-tests.yml
+++ b/.azuredevops/components/hip-tests.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: hip-tests
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -79,10 +60,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: hip_tests_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -99,18 +76,15 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    # compile hip-tests
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        componentName: hip-tests
        cmakeSourceDir: '../catch'
        customBuildTarget: build_tests
        extraBuildFlags: >-
@@ -122,12 +96,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -137,56 +108,52 @@ jobs:
        extraEnvVars:
          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hip_tests_test_${{ job.target }}
-    - job: hip_tests_test_${{ job.target }}
+    timeoutInMinutes: 240
-      timeoutInMinutes: 240
+    dependsOn: hip_tests_build_${{ job.target }}
-      dependsOn: hip_tests_build_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - checkout: none
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        checkoutRef: ${{ parameters.checkoutRef }}
-        parameters:
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        gpuTarget: ${{ job.target }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+    - task: Bash@3
-          gpuTarget: ${{ job.target }}
+      displayName: Symlink rocm_agent_enumerator
-          ${{ if parameters.triggerDownstreamJobs }}:
+      inputs:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+        targetType: inline
-      - task: Bash@3
+        script: |
-        displayName: Symlink rocm_agent_enumerator
+          # Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
-        inputs:
+          sudo mkdir -p /opt/rocm/bin
-          targetType: inline
+          sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
-          script: |
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-            # Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-            sudo mkdir -p /opt/rocm/bin
+      parameters:
-            sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
+        componentName: hip_tests
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        testDir: $(Agent.BuildDirectory)/rocm/share/hip
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
+      parameters:
-          componentName: ${{ parameters.componentName }}
+        aptPackages: ${{ parameters.aptPackages }}
-          testDir: $(Agent.BuildDirectory)/rocm/share/hip
+        environment: test
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+        optSymLink: true
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
          optSymLink: true
--- a/.azuredevops/components/hipBLASLt.yml
+++ b/.azuredevops/components/hipBLASLt.yml
@@ -77,7 +77,6 @@ parameters:
    - clr
    - hipBLAS-common
    - llvm-project
    - rocm-cmake
    - rocminfo
    - rocm_smi_lib
    - rocprofiler-register
@@ -145,7 +144,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -179,7 +178,7 @@ jobs:
          mkdir -p $(Agent.BuildDirectory)/temp-deps
          cd $(Agent.BuildDirectory)/temp-deps
          # position-independent LAPACK is required for almalinux8 builds
-          cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/sparse/projects/hipblaslt/deps
+          cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
          make -j
          sudo make install
    - script: |
@@ -198,8 +197,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt
        cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt/build
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
--- a/.azuredevops/components/hipSPARSELt.yml
+++ b/.azuredevops/components/hipSPARSELt.yml
@@ -40,12 +40,10 @@ parameters:
    - gfortran
    - libgfortran5
    - libopenblas-dev
    - liblapack-dev
 - name: pipModules
  type: object
  default:
    - joblib
    - msgpack
 - name: rocmDependencies
  type: object
  default:
@@ -54,7 +52,6 @@ parameters:
    - hipSPARSE
    - llvm-project
    - rocBLAS
    - rocm-cmake
    - rocm_smi_lib
    - rocminfo
    - rocprofiler-register
@@ -68,7 +65,6 @@ parameters:
    - llvm-project
    - hipBLAS-common
    - hipBLASLt
    - rocm-cmake
    - rocBLAS
    - rocminfo
    - rocprofiler-register
@@ -112,7 +108,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -128,13 +124,10 @@ jobs:
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
  # NOTE: content between `---` is for transition support between old/new build systems
  # and should be removed once transition is complete.
  # -----------------------------
  # Build and install gtest and lapack
  # $(Pipeline.Workspace)/deps is a temporary folder for the build process
  # $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
-    - script: mkdir -p $(Pipeline.Workspace)/deps
+    - script: mkdir $(Pipeline.Workspace)/deps
      displayName: Create temp folder for external dependencies
  # hipSPARSELt already has a CMake script for external deps, so we can just run that
  # https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
@@ -150,35 +143,22 @@ jobs:
    - script: sudo make install
      displayName: Install hipSPARSELt external dependencies
      workingDirectory: $(Pipeline.Workspace)/deps
  # -----------------------------
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        # NOTE: the following options are old build only 
        # and can be removed after full transition to new build
        # -DAMDGPU_TARGETS=${{ job.target }}
        # -DCMAKE_Fortran_COMPILER=f95
        # -DTensile_LOGIC=
        # -DTensile_CPU_THREADS=
        # -DTensile_LIBRARY_FORMAT=msgpack
        # -DROCM_PATH=$(Agent.BuildDirectory)/rocm
        # -DBUILD_CLIENTS_TESTS=ON
        # -DBUILD_USE_LOCAL_TENSILE=OFF
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
          -DCMAKE_Fortran_COMPILER=f95
          -DAMDGPU_TARGETS=${{ job.target }}
          -DTensile_LOGIC=
          -DTensile_CPU_THREADS=
          -DTensile_LIBRARY_FORMAT=msgpack
          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DBUILD_CLIENTS_TESTS=ON
          -DBUILD_USE_LOCAL_TENSILE=OFF
          -DHIPSPARSELT_ENABLE_FETCH=ON
          -GNinja
        ${{ if ne(parameters.sparseCheckoutDir, '') }}:
          cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: hipTensor
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -70,7 +51,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.target }}
+  - job: hipTensor_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -85,21 +66,17 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_BUILD_TYPE=Release
          -DHIPTENSOR_BUILD_TESTS=ON
@@ -107,12 +84,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -120,47 +94,44 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipTensor_test_${{ job.target }}
-    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+    timeoutInMinutes: 90
-      timeoutInMinutes: 90
+    dependsOn: hipTensor_build_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
-        parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-          ${{ if parameters.triggerDownstreamJobs }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        componentName: hipTensor
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
-        parameters:
+        testParameters: '-E ".*-extended" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
-          componentName: ${{ parameters.componentName }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
+      parameters:
-          testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+        aptPackages: ${{ parameters.aptPackages }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        environment: test
-        parameters:
+        gpuTarget: ${{ job.target }}
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipfort.yml
+++ b/.azuredevops/components/hipfort.yml
@@ -71,7 +71,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
--- a/.azuredevops/components/origami.yml
+++ b/.azuredevops/components/origami.yml
@@ -1,308 +0,0 @@
 parameters:
 - name: componentName
  type: string
  default: origami
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
  type: boolean
  default: false
 - name: aptPackages
  type: object
  default:
    - cmake
    - git
    - ninja-build
    - wget
    - python3
    - python3-dev
    - python3-pip
    - python3-venv
    - libgtest-dev
    - libboost-filesystem-dev
    - libboost-program-options-dev
 - name: pipModules
  type: object
  default:
    - nanobind>=2.0.0
    - pytest
    - pytest-cov
 - name: rocmDependencies
  type: object
  default:
    - clr
    - llvm-project
    - rocm-cmake
    - rocminfo
    - ROCR-Runtime
    - rocprofiler-register
 - name: rocmTestDependencies
  type: object
  default:
    - clr
    - llvm-project
    - rocm-cmake
    - rocminfo
    - ROCR-Runtime
    - rocprofiler-register
 - name: jobMatrix
  type: object
  default:
    buildJobs:
      - { os: ubuntu2204, packageManager: apt }
      - { os: almalinux8, packageManager: dnf }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1151 }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
 - name: downstreamComponentMatrix
  type: object
  default:
    - hipBLASLt:
      name: hipBLASLt
      sparseCheckoutDir: projects/hipblaslt
      skipUnifiedBuild: 'false'
      buildDependsOn:
        - origami_build
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: origami_build_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool:
      vmImage: ${{ variables.BASE_BUILD_POOL }}
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
      parameters:
        dependencyList:
          - gtest
    - ${{ if ne(job.os, 'almalinux8') }}:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
        parameters:
          dependencyList:
            - catch2
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DORIGAMI_BUILD_SHARED_LIBS=ON
          -DORIGAMI_ENABLE_PYTHON=ON
          -DORIGAMI_BUILD_TESTING=ON
          -DORIGAMI_ENABLE_FETCH=ON
          -GNinja
    - ${{ if ne(job.os, 'almalinux8') }}:
      - task: PublishPipelineArtifact@1
        displayName: 'Publish Build Directory Artifact'
        inputs:
          targetPath: '$(Agent.BuildDirectory)/s/build'
          artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
          publishLocation: 'pipeline'
      - task: PublishPipelineArtifact@1
        displayName: 'Publish Python Source Artifact'
        inputs:
          targetPath: '$(Agent.BuildDirectory)/s/python'
          artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
          publishLocation: 'pipeline'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
    - job: origami_test_${{ job.os }}_${{ job.target }}
      timeoutInMinutes: 120
      dependsOn: origami_build_${{ job.os }}
      condition:
        and(succeeded(),
          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
          eq(${{ parameters.aggregatePipeline }}, False)
        )
      variables:
      - group: common
      - template: /.azuredevops/variables-global.yml
      pool: ${{ job.target }}_test_pool
      workspace:
        clean: all
      steps:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          pipModules: ${{ parameters.pipModules }}
          packageManager: ${{ job.packageManager }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
        parameters:
          checkoutRepo: ${{ parameters.checkoutRepo }}
          sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
        parameters:
          dependencyList:
            - gtest
      - ${{ if ne(job.os, 'almalinux8') }}:
        - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
          parameters:
            dependencyList:
              - catch2
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
        parameters:
          preTargetFilter: ${{ parameters.componentName }}
          os: ${{ job.os }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
        parameters:
          checkoutRef: ${{ parameters.checkoutRef }}
          dependencyList: ${{ parameters.rocmTestDependencies }}
          os: ${{ job.os }}
          gpuTarget: ${{ job.target }}
          ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
      - task: CMake@1
        displayName: 'Origami Test CMake Configuration'
        inputs:
          cmakeArgs: >-
            -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
            -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
            -DORIGAMI_BUILD_SHARED_LIBS=ON
            -DORIGAMI_ENABLE_PYTHON=ON
            -DORIGAMI_BUILD_TESTING=ON
            -GNinja
            $(Agent.BuildDirectory)/s
      - task: Bash@3
        displayName: 'Build Origami Tests and Python Bindings'
        inputs:
          targetType: inline
          workingDirectory: build
          script: |
            cmake --build . --target origami-tests origami_python -- -j$(nproc)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      # Run tests using CTest (discovers and runs both C++ and Python tests)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
        parameters:
          componentName: ${{ parameters.componentName }}
          os: ${{ job.os }}
          testDir: 'build'
          testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
      # Test pip install workflow
      # - task: Bash@3
      #   displayName: 'Test Pip Install'
      #   inputs:
      #     targetType: inline
      #     script: |
      #       set -e
      #       echo "==================================================================="
      #       echo "Testing pip install workflow (pip install -e .)"
      #       echo "==================================================================="
      #       # Set environment variables for pip install CMake build
      #       export ROCM_PATH=$(Agent.BuildDirectory)/rocm
      #       export CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm:$(Agent.BuildDirectory)/vendor
      #       export CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
      #       echo "ROCM_PATH: $ROCM_PATH"
      #       echo "CMAKE_PREFIX_PATH: $CMAKE_PREFIX_PATH"
      #       echo "CMAKE_CXX_COMPILER: $CMAKE_CXX_COMPILER"
      #       echo ""
      #       # Install from source directory
      #       cd "$(Agent.BuildDirectory)/s/python"
      #       pip install -e .
      #       # Verify import works
      #       echo ""
      #       echo "Verifying origami can be imported..."
      #       python3 -c "import origami; print('✓ Successfully imported origami')"
      #       # Run pytest on installed package
      #       echo ""
      #       echo "Running pytest tests..."
      #       python3 -m pytest tests/ -v -m "not slow" --tb=short
      #       echo ""
      #       echo "==================================================================="
      #       echo "Pip install test completed successfully"
      #       echo "==================================================================="
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          pipModules: ${{ parameters.pipModules }}
          environment: test
          gpuTarget: ${{ job.target }}
 - ${{ if parameters.triggerDownstreamJobs }}:
  - ${{ each component in parameters.downstreamComponentMatrix }}:
    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
        parameters:
          checkoutRepo: ${{ parameters.checkoutRepo }}
          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
          buildDependsOn: ${{ component.buildDependsOn }}
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
          triggerDownstreamJobs: true
          unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/rccl.yml
+++ b/.azuredevops/components/rccl.yml
@@ -1,35 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rccl
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 - name: systemsRepo
  type: string
  default: systems_repo
 - name: systemsSparseCheckoutDir
  type: string
  default: 'projects/rocprofiler-sdk'
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -82,52 +57,37 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
      - gfx90a:
        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
- name: downstreamComponentMatrix
+      - gfx90a:
-  type: object
+        target: gfx90a
  default:
    - rocprofiler-sdk:
      name: rocprofiler-sdk
      sparseCheckoutDir: ''
      skipUnifiedBuild: 'false'
      buildDependsOn:
        - rccl_build
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: rccl_build_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
+    timeoutInMinutes: 90
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}_${{ job.target }}
    timeoutInMinutes: 120
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: HIP_ROCCLR_HOME
      value: $(Build.BinariesDirectory)/rocm
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        submoduleBehaviour: recursive
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
      parameters:
@@ -137,14 +97,10 @@ jobs:
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
@@ -156,87 +112,58 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+        extraEnvVars:
-          extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+        installLatestCMake: true
          installLatestCMake: true
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rccl_test_${{ job.target }}
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
+    timeoutInMinutes: 120
-      timeoutInMinutes: 120
+    dependsOn: rccl_build_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        gpuTarget: ${{ job.target }}
-          preTargetFilter: ${{ parameters.componentName }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-          gpuTarget: ${{ job.target }}
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+        checkoutRef: ${{ parameters.checkoutRef }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-        parameters:
+        gpuTarget: ${{ job.target }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-          os: ${{ job.os }}
+      parameters:
-          gpuTarget: ${{ job.target }}
+        componentName: rccl
-          ${{ if parameters.triggerDownstreamJobs }}:
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+        testExecutable: './rccl-UnitTests'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
+      parameters:
-          componentName: ${{ parameters.componentName }}
+        aptPackages: ${{ parameters.aptPackages }}
-          os: ${{ job.os }}
+        environment: test
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        gpuTarget: ${{ job.target }}
          testExecutable: './rccl-UnitTests'
          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
 - ${{ if parameters.triggerDownstreamJobs }}:
  - ${{ each component in parameters.downstreamComponentMatrix }}:
    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
        parameters:
          checkoutRepo: ${{ parameters.systemsRepo }}
          sparseCheckoutDir: ${{ parameters.systemsSparseCheckoutDir }}
          triggerDownstreamJobs: true
          unifiedBuild: ${{ parameters.unifiedBuild }}
          ${{ if parameters.unifiedBuild }}:
            buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
          ${{ else }}:
            buildDependsOn: ${{ component.buildDependsOn }}
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
--- a/.azuredevops/components/rdc.yml
+++ b/.azuredevops/components/rdc.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rdc
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -52,7 +33,6 @@ parameters:
    - clr
    - hipBLAS-common
    - hipBLASLt
    - hipRAND
    - llvm-project
    - rocBLAS
    - rocm-cmake
@@ -63,7 +43,6 @@ parameters:
    - rocprofiler
    - rocprofiler-register
    - rocprofiler-sdk
    - rocRAND
    - ROCR-Runtime
 - name: rocmTestDependencies
  type: object
@@ -95,11 +74,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.target }}
+  - job: rdc_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -110,22 +85,16 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
  # Build grpc
    - task: Bash@3
      displayName: 'git clone grpc'
@@ -135,7 +104,6 @@ jobs:
        workingDirectory: $(Build.SourcesDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
        cmakeSourceDir: $(Build.SourcesDirectory)/grpc
        installDir: $(Build.SourcesDirectory)/bin
@@ -149,7 +117,6 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DGRPC_ROOT="$(Build.SourcesDirectory)/bin"
@@ -159,12 +126,9 @@ jobs:
          -DAMDGPU_TARGETS=${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -172,64 +136,60 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rdc_test_${{ job.target }}
-    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+    dependsOn: rdc_build_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    - name: ROCM_PATH
-      - name: ROCM_PATH
+      value: $(Agent.BuildDirectory)/rocm
-        value: $(Agent.BuildDirectory)/rocm
+    - name: ROCM_DIR
-      - name: ROCM_DIR
+      value: $(Agent.BuildDirectory)/rocm
-        value: $(Agent.BuildDirectory)/rocm
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - checkout: none
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        checkoutRef: ${{ parameters.checkoutRef }}
-        parameters:
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        gpuTarget: ${{ job.target }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+    - task: Bash@3
-          gpuTarget: ${{ job.target }}
+      displayName: Setup test environment
-          ${{ if parameters.triggerDownstreamJobs }}:
+      inputs:
-              downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+        targetType: inline
-      - task: Bash@3
+        script: |
-        displayName: Setup test environment
+          sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
-        inputs:
+          echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
-          targetType: inline
+          sudo ldconfig -v
-          script: |
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-            sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
+    - task: Bash@3
-            echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
+      displayName: Test rdc
-            sudo ldconfig -v
+      inputs:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        targetType: inline
-      - task: Bash@3
+        script: >-
-        displayName: Test rdc
+          $(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
-        inputs:
+          --batch_mode
-          targetType: inline
+          --start_rdcd
-          script: >-
+          --unauth_comm
-            $(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-            --batch_mode
+      parameters:
-            --start_rdcd
+        aptPackages: ${{ parameters.aptPackages }}
-            --unauth_comm
+        environment: test
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+        extraPaths: /home/user/workspace/rocm/bin
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
          extraPaths: /home/user/workspace/rocm/bin
--- a/.azuredevops/components/rocBLAS.yml
+++ b/.azuredevops/components/rocBLAS.yml
@@ -70,7 +70,6 @@ parameters:
    - hipBLAS-common
    - hipBLASLt
    - llvm-project
    - rocm-cmake
    - rocminfo
    - rocprofiler-register
    - rocm_smi_lib
@@ -155,7 +154,7 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -180,8 +179,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/rocblas
        cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/rocblas/build
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_BUILD_TYPE=Release
--- a/.azuredevops/components/rocDecode.yml
+++ b/.azuredevops/components/rocDecode.yml
@@ -8,25 +8,6 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
 - name: rocPyDecodeRepo
  type: string
  default: rocpydecode_repo
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -75,23 +56,10 @@ parameters:
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
 - name: downstreamComponentMatrix
  type: object
  default:
    - rocPyDecode:
      name: rocPyDecode
      sparseCheckoutDir: ''
      skipUnifiedBuild: 'false'
      buildDependsOn:
        - rocDecode_build
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: ${{ parameters.componentName }}_build_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -115,15 +83,12 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
@@ -204,15 +169,3 @@ jobs:
        registerROCmPackages: true
        environment: test
        gpuTarget: ${{ job.target }}
 - ${{ if parameters.triggerDownstreamJobs }}:
  - ${{ each component in parameters.downstreamComponentMatrix }}:
    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
        parameters:
          checkoutRepo: ${{ parameters.rocPyDecodeRepo }}
          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
          buildDependsOn: ${{ component.buildDependsOn }}
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
          triggerDownstreamJobs: true
          unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -210,7 +210,7 @@ jobs:
        parameters:
          componentName: ${{ parameters.componentName }}
          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
-          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
+          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
          os: ${{ job.os }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
--- a/.azuredevops/components/rocPyDecode.yml
+++ b/.azuredevops/components/rocPyDecode.yml
@@ -5,22 +5,6 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -63,19 +47,19 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
      - gfx90a:
        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
      - gfx90a:
        target: gfx90a
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: rocPyDecode_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -90,20 +74,16 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: 'Save Python Package Paths'
      inputs:
--- a/.azuredevops/components/rocWMMA.yml
+++ b/.azuredevops/components/rocWMMA.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rocWMMA
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -85,11 +66,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.target }}
+  - job: rocWMMA_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -104,7 +81,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -126,12 +102,9 @@ jobs:
  # gfx1030 not supported in documentation
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -139,45 +112,43 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocWMMA_test_${{ job.target }}
-    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+    timeoutInMinutes: 270
-      timeoutInMinutes: 350
+    dependsOn: rocWMMA_build_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        gpuTarget: ${{ job.target }}
-          preTargetFilter: ${{ parameters.componentName }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        checkoutRef: ${{ parameters.checkoutRef }}
-        parameters:
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        gpuTarget: ${{ job.target }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        componentName: rocWMMA
-        parameters:
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
-          componentName: ${{ parameters.componentName }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+        environment: test
-          aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocm-cmake.yml
+++ b/.azuredevops/components/rocm-cmake.yml
@@ -81,7 +81,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
        componentName: rocm-cmake
-        testParameters: '-E "pass-version-parent" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+        testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
--- a/.azuredevops/components/rocm-core.yml
+++ b/.azuredevops/components/rocm-core.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rocm-core
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -46,10 +27,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: rocm_core_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
        dependsOn:
          - ${{ each build in parameters.buildDependsOn }}:
            - ${{ build }}_${{ job.os }}
    pool:
      ${{ if eq(job.os, 'ubuntu2404') }}:
        vmImage: 'ubuntu-24.04'
@@ -73,10 +50,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        useAmdclang: false
        extraBuildFlags: >-
@@ -90,12 +65,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -14,42 +14,16 @@ parameters:
  type: object
  default:
    - cmake
    - libdw-dev
    - libglfw3-dev
    - libmsgpack-dev
    - libopencv-dev
    - libtbb-dev
    - libtiff-dev
    - libva-amdgpu-dev
    - libva2-amdgpu
    - mesa-amdgpu-va-drivers
    - libavcodec-dev
    - libavformat-dev
    - libavutil-dev
    - ninja-build
    - python3-pip
    - protobuf-compiler
    - libprotoc-dev
    - libopencv-dev
 - name: pipModules
  type: object
  default:
    - future==1.0.0
    - pytz==2022.1
    - numpy==1.23
    - google==3.0.0
    - protobuf==3.12.4
    - onnx==1.12.0
    - nnef==1.0.7
 - name: rocmDependencies
  type: object
  default:
    - AMDMIGraphX
    - aomp
    - aomp-extras
    - clr
    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
    - hipBLASLt
@@ -59,37 +33,21 @@ parameters:
    - hipRAND
    - hipSOLVER
    - hipSPARSE
    - hipTensor
    - llvm-project
    - MIOpen
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocAL
    - rocALUTION
    - rocBLAS
    - rocDecode
    - rocFFT
    - rocJPEG
    - rocPRIM
    - rocprofiler-register
    - rocprofiler-sdk
    - ROCR-Runtime
    - rocRAND
    - rocSOLVER
    - rocSPARSE
    - rocThrust
    - rocWMMA
    - rpp
 - name: rocmTestDependencies
  type: object
  default:
    - AMDMIGraphX
    - aomp
    - aomp-extras
    - clr
    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
    - hipBLASLt
@@ -99,30 +57,18 @@ parameters:
    - hipRAND
    - hipSOLVER
    - hipSPARSE
    - hipTensor
    - llvm-project
    - MIOpen
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocAL
    - rocALUTION
    - rocBLAS
    - rocDecode
    - rocFFT
    - rocminfo
    - rocPRIM
    - rocJPEG
    - rocprofiler-register
    - rocprofiler-sdk
    - ROCR-Runtime
    - rocRAND
    - rocSOLVER
    - rocSPARSE
    - rocThrust
    - roctracer
    - rocWMMA
    - rpp
 - name: jobMatrix
  type: object
@@ -151,11 +97,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -217,10 +158,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.25.0'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -251,6 +188,5 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        environment: test
        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocm-libraries.yml
+++ b/.azuredevops/components/rocm-libraries.yml
@@ -43,14 +43,9 @@ parameters:
    - ninja-build
    - python3-pip
    - python3-venv
    - googletest
    - libgtest-dev
    - libgmock-dev
    - libboost-filesystem-dev
 - name: pipModules
  type: object
  default:
    - msgpack
    - joblib
    - "packaging>=22.0"
    - pytest
@@ -107,7 +102,7 @@ jobs:
    workspace:
      clean: all
    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
@@ -152,13 +147,6 @@ jobs:
          echo "##vso[task.prependpath]$USER_BASE/bin"
          echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
        displayName: Set cmake configure paths
    - task: Bash@3
      displayName: Add ROCm binaries to PATH
      inputs:
        targetType: inline
        script: |
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
--- a/.azuredevops/components/rocm_smi_lib.yml
+++ b/.azuredevops/components/rocm_smi_lib.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rocm_smi_lib
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -51,10 +32,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: rocm_smi_lib_build_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    pool:
      ${{ if eq(job.os, 'ubuntu2404') }}:
        vmImage: 'ubuntu-24.04'
@@ -78,10 +55,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        useAmdclang: false
        extraBuildFlags: >-
@@ -90,56 +65,51 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
    #     aptPackages: ${{ parameters.aptPackages }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
-    - job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
+    dependsOn: rocm_smi_lib_build_${{ job.os }}
-      dependsOn: rocm_smi_lib_build_${{ job.os }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - checkout: none
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+        packageManager: ${{ job.packageManager }}
-          aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-          packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        os: ${{ job.os }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-          os: ${{ job.os }}
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        runRocminfo: false
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-          runRocminfo: false
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        componentName: rocm_smi_lib
-        parameters:
+        testDir: '$(Agent.BuildDirectory)'
-          componentName: ${{ parameters.componentName }}
+        testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
-          testDir: '$(Agent.BuildDirectory)'
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-          testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
+        os: ${{ job.os }}
-          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          os: ${{ job.os }}
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+        environment: test
-          aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocminfo.yml
+++ b/.azuredevops/components/rocminfo.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rocminfo
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -59,11 +40,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
+  - job: rocminfo_build_${{ job.os }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os }}
    pool:
      vmImage: 'ubuntu-22.04'
    ${{ if eq(job.os, 'almalinux8') }}:
@@ -85,18 +62,14 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        os: ${{ job.os }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        useAmdclang: false
        extraBuildFlags: >-
@@ -105,71 +78,65 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocminfo_test_${{ job.target }}
-    - job: rocminfo_test_${{ job.target }}
+    dependsOn: rocminfo_build_${{ job.os }}
-      dependsOn: rocminfo_build_${{ job.os }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        packageManager: ${{ job.packageManager }}
-          packageManager: ${{ job.packageManager }}
+        registerROCmPackages: true
-          registerROCmPackages: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
-        parameters:
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
-        parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+        os: ${{ job.os }}
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-          ${{ if parameters.triggerDownstreamJobs }}:
+      parameters:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+        runRocminfo: false
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
+      parameters:
-          runRocminfo: false
+        componentName: rocminfo
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        testDir: '$(Agent.BuildDirectory)'
-        parameters:
+        testExecutable: './rocm/bin/rocminfo'
-          componentName: ${{ parameters.componentName }}
+        testParameters: ''
-          testDir: '$(Agent.BuildDirectory)'
+        testPublishResults: false
-          testExecutable: './rocm/bin/rocminfo'
+        os: ${{ job.os }}
-          testParameters: ''
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-          testPublishResults: false
+      parameters:
-          os: ${{ job.os }}
+        componentName: rocm_agent_enumerator
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        testDir: '$(Agent.BuildDirectory)'
-        parameters:
+        testExecutable: './rocm/bin/rocm_agent_enumerator'
-          componentName: rocm_agent_enumerator
+        testParameters: ''
-          testDir: '$(Agent.BuildDirectory)'
+        testPublishResults: false
-          testExecutable: './rocm/bin/rocm_agent_enumerator'
+        os: ${{ job.os }}
-          testParameters: ''
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          testPublishResults: false
+      parameters:
-          os: ${{ job.os }}
+        aptPackages: ${{ parameters.aptPackages }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        registerROCmPackages: true
-        parameters:
+        environment: test
-          aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
          registerROCmPackages: true
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -55,7 +55,6 @@ parameters:
    - pymongo
    - pyyaml
    - setuptools
    - sqlalchemy
    - tabulate
    - textual
    - textual_plotext
@@ -65,13 +64,6 @@ parameters:
    - pytest
    - pytest-cov
    - pytest-xdist
 - name: rocmDependencies
  type: object
  default:
    - clr
    - llvm-project
    - ROCR-Runtime
    - rocprofiler-sdk
 - name: rocmTestDependencies
  type: object
  default:
@@ -108,12 +100,10 @@ jobs:
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.target }}
+          - ${{ build }}_${{ job.os }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool:
      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
@@ -128,14 +118,6 @@ jobs:
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
--- a/.azuredevops/components/rocprofiler-sdk.yml
+++ b/.azuredevops/components/rocprofiler-sdk.yml
@@ -1,29 +1,10 @@
 parameters:
 - name: componentName
  type: string
  default: rocprofiler-sdk
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -79,27 +60,23 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
      - gfx90a:
        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+        target: gfx942
      - gfx90a:
        target: gfx90a
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }}
+  - job: rocprofiler_sdk_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
          - ${{ build }}_${{ job.os}}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
@@ -107,23 +84,18 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: Add Python site-packages binaries to path
      inputs:
@@ -133,8 +105,6 @@ jobs:
          echo "##vso[task.prependpath]$USER_BASE/bin"
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DROCPROFILER_BUILD_TESTS=ON
@@ -144,13 +114,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -160,72 +126,62 @@ jobs:
    #     gpuTarget: ${{ job.target }}
    #     registerROCmPackages: true
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocprofiler_sdk_test_${{ job.target }}
-    - job: rocprofiler_sdk_test_${{ job.os }}_${{ job.target }}
+    dependsOn: rocprofiler_sdk_build_${{ job.target }}
-      dependsOn: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
-      pool: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
-          pipModules: ${{ parameters.pipModules }}
+        registerROCmPackages: true
-          packageManager: ${{ job.packageManager }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-          registerROCmPackages: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+        checkoutRepo: ${{ parameters.checkoutRepo }}
-        parameters:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-          sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-          checkoutRepo: ${{ parameters.checkoutRepo }}
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+        checkoutRef: ${{ parameters.checkoutRef }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        dependencyList: ${{ parameters.rocmDependencies }}
-        parameters:
+        gpuTarget: ${{ job.target }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+    - task: Bash@3
-          dependencyList: ${{ parameters.rocmDependencies }}
+      displayName: Add Python and ROCm binaries to path
-          os: ${{ job.os }}
+      inputs:
-          gpuTarget: ${{ job.target }}
+        targetType: inline
-          ${{ if parameters.triggerDownstreamJobs }}:
+        script: |
-              downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+          USER_BASE=$(python3 -m site --user-base)
-      - task: Bash@3
+          echo "##vso[task.prependpath]$USER_BASE/bin"
-        displayName: Add Python and ROCm binaries to path
+          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
-        inputs:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-          targetType: inline
+      parameters:
-          script: |
+        extraBuildFlags: >-
-            USER_BASE=$(python3 -m site --user-base)
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-            echo "##vso[task.prependpath]$USER_BASE/bin"
+          -DROCPROFILER_BUILD_TESTS=ON
-            echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
+          -DROCPROFILER_BUILD_SAMPLES=ON
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+          -DROCPROFILER_BUILD_RELEASE=ON
-        parameters:
+          -DGPU_TARGETS=${{ job.target }}
-          componentName: ${{ parameters.componentName }}
+          -GNinja
-          os: ${{ job.os }}
+    - template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml
-          extraBuildFlags: >-
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-            -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+      parameters:
-            -DROCPROFILER_BUILD_TESTS=ON
+        componentName: rocprofiler-sdk
-            -DROCPROFILER_BUILD_SAMPLES=ON
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-            -DROCPROFILER_BUILD_RELEASE=ON
+      parameters:
-            -DGPU_TARGETS=${{ job.target }}
+        aptPackages: ${{ parameters.aptPackages }}
-            -GNinja
+        pipModules: ${{ parameters.pipModules }}
-      - template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml
+        environment: test
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        gpuTarget: ${{ job.target }}
-        parameters:
+        registerROCmPackages: true
          componentName: ${{ parameters.componentName }}
          os: ${{ job.os }}
          testDir: $(Agent.BuildDirectory)/build
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          pipModules: ${{ parameters.pipModules }}
          environment: test
          gpuTarget: ${{ job.target }}
          registerROCmPackages: true
--- a/.azuredevops/components/rocprofiler-systems.yml
+++ b/.azuredevops/components/rocprofiler-systems.yml
@@ -6,25 +6,6 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: componentName
  type: string
  default: rocprofiler-systems
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -106,10 +87,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: rocprofiler_systems_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
        dependsOn:
          - ${{ each build in parameters.buildDependsOn }}:
            - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -128,7 +105,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
@@ -160,16 +136,12 @@ jobs:
          -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
          -DGPU_TARGETS=${{ job.target }}
          -GNinja
        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        gpuTarget: ${{ job.target }}
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        gpuTarget: ${{ job.target }}
        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
@@ -179,93 +151,85 @@ jobs:
        registerROCmPackages: true
        extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
- ${{ if eq(parameters.unifiedBuild, False) }}:
+- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocprofiler_systems_test_${{ job.target }}
-    - job: rocprofiler_systems_test_${{ job.target }}
+    dependsOn: rocprofiler_systems_build_${{ job.target }}
-      dependsOn: rocprofiler_systems_build_${{ job.target }}
+    condition:
-      condition:
+      and(succeeded(),
-        and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        eq(${{ parameters.aggregatePipeline }}, False)
-          eq(${{ parameters.aggregatePipeline }}, False)
+      )
-        )
+    timeoutInMinutes: 180
-      timeoutInMinutes: 180
+    variables:
-      variables:
+    - group: common
-      - group: common
+    - template: /.azuredevops/variables-global.yml
-      - template: /.azuredevops/variables-global.yml
+    - name: ROCM_PATH
-      - name: ROCM_PATH
+      value: $(Agent.BuildDirectory)/rocm
-        value: $(Agent.BuildDirectory)/rocm
+    pool:
-      pool:
+      name: ${{ job.target }}_test_pool
-        name: ${{ job.target }}_test_pool
+    workspace:
-      workspace:
+      clean: all
-        clean: all
+    steps:
-      steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
-        parameters:
+        aptPackages: ${{ parameters.aptPackages }}
-          aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
-          pipModules: ${{ parameters.pipModules }}
+        registerROCmPackages: true
-          registerROCmPackages: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+      parameters:
-        parameters:
+        checkoutRepo: ${{ parameters.checkoutRepo }}
-          checkoutRepo: ${{ parameters.checkoutRepo }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
-        parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
-          checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmDependencies }}
-          dependencyList: ${{ parameters.rocmDependencies }}
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+    - task: Bash@3
-          ${{ if parameters.triggerDownstreamJobs }}:
+      displayName: Add ROCm binaries to PATH
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+      inputs:
-      - task: Bash@3
+        targetType: inline
-        displayName: Add ROCm binaries to PATH
+        script: |
-        inputs:
+          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
-          targetType: inline
+          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
-          script: |
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-            echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
+      parameters:
-            echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
+  # build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+        extraBuildFlags: >-
-        parameters:
+          -DROCPROFSYS_BUILD_TESTING=ON
-          cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/rocprofiler-systems
+          -DROCPROFSYS_BUILD_DYNINST=ON
-    # build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
+          -DROCPROFSYS_BUILD_LIBUNWIND=ON
-          extraBuildFlags: >-
+          -DROCPROFSYS_DISABLE_EXAMPLES="openmp-target"
-            -DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocprofiler-systems
+          -DDYNINST_BUILD_TBB=ON
-            -DROCPROFSYS_USE_PYTHON=ON
+          -DDYNINST_BUILD_ELFUTILS=ON
-            -DROCPROFSYS_BUILD_TESTING=ON
+          -DDYNINST_BUILD_LIBIBERTY=ON
-            -DROCPROFSYS_BUILD_DYNINST=ON
+          -DDYNINST_BUILD_BOOST=ON
-            -DROCPROFSYS_BUILD_LIBUNWIND=ON
+          -DROCPROFSYS_USE_PAPI=ON
-            -DROCPROFSYS_DISABLE_EXAMPLES="openmp-target"
+          -DROCPROFSYS_USE_MPI=ON
-            -DDYNINST_BUILD_TBB=ON
+          -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
-            -DDYNINST_BUILD_ELFUTILS=ON
+          -DGPU_TARGETS=${{ job.target }}
-            -DDYNINST_BUILD_LIBIBERTY=ON
+          -GNinja
-            -DDYNINST_BUILD_BOOST=ON
+    - task: Bash@3
-            -DROCPROFSYS_USE_PAPI=ON
+      displayName: Set up rocprofiler-systems env
-            -DROCPROFSYS_USE_MPI=ON
+      inputs:
-            -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
+        targetType: inline
-            -DGPU_TARGETS=${{ job.target }}
+        script: source share/rocprofiler-systems/setup-env.sh
-            -GNinja
+        workingDirectory: build
-      - task: Bash@3
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        displayName: Set up rocprofiler-systems env
+      parameters:
-        inputs:
+        componentName: rocprofiler-systems
-          targetType: inline
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-          script: source $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems/setup-env.sh
+      parameters:
-          workingDirectory: $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems
+        gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-        parameters:
+      parameters:
-          componentName: ${{ parameters.componentName }}
+        gpuTarget: ${{ job.target }}
-          testDir: $(Agent.BuildDirectory)/s/build/tests/
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-          testParameters: '--output-on-failure'
+      parameters:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+        aptPackages: ${{ parameters.aptPackages }}
-        parameters:
+        pipModules: ${{ parameters.pipModules }}
-          gpuTarget: ${{ job.target }}
+        environment: test
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+        registerROCmPackages: true
-        parameters:
+        gpuTarget: ${{ job.target }}
-          gpuTarget: ${{ job.target }}
+        extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          pipModules: ${{ parameters.pipModules }}
          environment: test
          registerROCmPackages: true
          gpuTarget: ${{ job.target }}
          extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
--- a/.azuredevops/dependencies/cli11.yml
+++ b/.azuredevops/dependencies/cli11.yml
@@ -1,63 +0,0 @@
 parameters:
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 - name: cli11Version
  type: string
  default: ''
 - name: aptPackages
  type: object
  default:
    - cmake
    - git
    - ninja-build
 - name: jobMatrix
  type: object
  default:
    buildJobs:
      - { os: ubuntu2204, packageManager: apt}
      - { os: almalinux8, packageManager: dnf}
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: cli11_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool:
      vmImage: 'ubuntu-22.04'
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - checkout: none
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - task: Bash@3
      displayName: Clone cli11 ${{ parameters.cli11Version }}
      inputs:
        targetType: inline
        script: git clone https://github.com/CLIUtils/CLI11.git -b ${{ parameters.cli11Version }}
        workingDirectory: $(Agent.BuildDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        cmakeBuildDir: $(Agent.BuildDirectory)/CLI11/build
        cmakeSourceDir: $(Agent.BuildDirectory)/CLI11
        useAmdclang: false
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
--- a/.azuredevops/dependencies/yamlcpp.yml
+++ b/.azuredevops/dependencies/yamlcpp.yml
@@ -1,66 +0,0 @@
 parameters:
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 - name: yamlcppVersion
  type: string
  default: ''
 - name: aptPackages
  type: object
  default:
    - cmake
    - git
    - ninja-build
 - name: jobMatrix
  type: object
  default:
    buildJobs:
      - { os: ubuntu2204, packageManager: apt}
      - { os: almalinux8, packageManager: dnf}
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: yamlcpp_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool:
      vmImage: 'ubuntu-22.04'
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - checkout: none
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - task: Bash@3
      displayName: Clone yaml-cpp ${{ parameters.yamlcppVersion }}
      inputs:
        targetType: inline
        script: git clone  https://github.com/jbeder/yaml-cpp.git -b ${{ parameters.yamlcppVersion }}
        workingDirectory: $(Agent.BuildDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
        cmakeBuildDir: $(Agent.BuildDirectory)/yaml-cpp/build
        cmakeSourceDir: $(Agent.BuildDirectory)/yaml-cpp
        useAmdclang: false
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
          -DYAML_CPP_BUILD_TOOLS=OFF
          -DYAML_BUILD_SHARED_LIBS=OFF
          -DYAML_CPP_INSTALL=ON
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
--- a/.azuredevops/tag-builds/cli11.yml
+++ b/.azuredevops/tag-builds/cli11.yml
@@ -1,23 +0,0 @@
 variables:
 - group: common
 - template: /.azuredevops/variables-global.yml
 parameters:
 - name: cli11Version
  type: string
  default: "main"
 resources:
  repositories:
  - repository: pipelines_repo
    type: github
    endpoint: ROCm
    name: ROCm/ROCm
 trigger: none
 pr: none
 jobs:
  - template: ${{ variables.CI_DEPENDENCIES_PATH }}/cli11.yml
    parameters:
      cli11Version: ${{ parameters.cli11Version }}
--- a/.azuredevops/tag-builds/yaml-cpp.yml
+++ b/.azuredevops/tag-builds/yaml-cpp.yml
@@ -1,24 +0,0 @@
 variables:
 - group: common
 - template: /.azuredevops/variables-global.yml
 parameters:
 - name: yamlcppVersion
  type: string
  default: "0.8.0"
 resources:
  repositories:
  - repository: pipelines_repo
    type: github
    endpoint: ROCm
    name: ROCm/ROCm
 trigger: none
 pr: none
 jobs:
  - template: ${{ variables.CI_DEPENDENCIES_PATH }}/yamlcpp.yml
    parameters:
      yamlcppVersion: ${{ parameters.yamlcppVersion }}
--- a/.azuredevops/templates/steps/dependencies-cmake-latest.yml
+++ b/.azuredevops/templates/steps/dependencies-cmake-latest.yml
@@ -1,15 +1,10 @@
 parameters:
  - name: cmakeVersion
    type: string
    default: '3.31.0'
 steps:
 - task: Bash@3
-  displayName: Install CMake ${{ parameters.cmakeVersion }}
+  displayName: Install CMake 3.31
  inputs:
    targetType: inline
    script: |
-      CMAKE_VERSION=${{ parameters.cmakeVersion }}
+      CMAKE_VERSION=3.31.0
      CMAKE_ROOT="$(Pipeline.Workspace)/cmake"
      echo "Downloading CMake $CMAKE_VERSION..."
--- a/.azuredevops/templates/steps/dependencies-dnf.yml
+++ b/.azuredevops/templates/steps/dependencies-dnf.yml
@@ -63,7 +63,6 @@ parameters:
    libopenblas-dev: openblas-devel
    libopenmpi-dev: openmpi-devel
    libpci-dev: libpciaccess-devel
    libsimde-dev: simde-devel
    libssl-dev: openssl-devel
    # note: libstdc++-devel is in the base packages list
    libsystemd-dev: systemd-devel
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -35,8 +35,8 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    amdsmi:
-      pipelineId: 376
+      pipelineId: 99
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    aomp-extras:
      pipelineId: 111
@@ -46,10 +46,6 @@ parameters:
      pipelineId: 115
      developBranch: aomp-dev
      hasGpuTarget: false
    aqlprofile:
      pipelineId: 365
      developBranch: develop
      hasGpuTarget: false
    clr:
      pipelineId: 335
      developBranch: develop
@@ -67,8 +63,8 @@ parameters:
      developBranch: develop
      hasGpuTarget: false
    hip-tests:
-      pipelineId: 362
+      pipelineId: 233
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    hipBLAS:
      pipelineId: 317
@@ -115,7 +111,7 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    hipTensor:
-      pipelineId: 374
+      pipelineId: 105
      developBranch: develop
      hasGpuTarget: true
    llvm-project:
@@ -130,17 +126,13 @@ parameters:
      pipelineId: 80
      developBranch: develop
      hasGpuTarget: true
    origami:
      pipelineId: 364
      developBranch: develop
      hasGpuTarget: true
    rccl:
      pipelineId: 107
      developBranch: develop
      hasGpuTarget: true
    rdc:
-      pipelineId: 360
+      pipelineId: 100
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    rocAL:
      pipelineId: 151
@@ -179,16 +171,16 @@ parameters:
      developBranch: develop
      hasGpuTarget: false
    rocm-core:
-      pipelineId: 349
+      pipelineId: 103
-      developBranch: develop
+      developBranch: master
      hasGpuTarget: false
    rocm-examples:
      pipelineId: 216
      developBranch: amd-staging
      hasGpuTarget: true
    rocminfo:
-      pipelineId: 356
+      pipelineId: 91
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    rocMLIR:
      pipelineId: 229
@@ -203,8 +195,8 @@ parameters:
      developBranch: master
      hasGpuTarget: false
    rocm_smi_lib:
-      pipelineId: 358
+      pipelineId: 96
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    rocPRIM:
      pipelineId: 273
@@ -215,7 +207,7 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    rocprofiler-compute:
-      pipelineId: 344
+      pipelineId: 257
      developBranch: develop
      hasGpuTarget: true
    rocprofiler-register:
@@ -223,20 +215,20 @@ parameters:
      developBranch: develop
      hasGpuTarget: false
    rocprofiler-sdk:
-      pipelineId: 347
+      pipelineId: 246
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: true
    rocprofiler-systems:
-      pipelineId: 345
+      pipelineId: 255
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: true
    rocPyDecode:
      pipelineId: 239
      developBranch: develop
      hasGpuTarget: true
    ROCR-Runtime:
-      pipelineId: 354
+      pipelineId: 10
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: false
    rocRAND:
      pipelineId: 274
@@ -259,11 +251,11 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    roctracer:
-      pipelineId: 331
+      pipelineId: 141
-      developBranch: develop
+      developBranch: amd-staging
      hasGpuTarget: true
    rocWMMA:
-      pipelineId: 370
+      pipelineId: 109
      developBranch: develop
      hasGpuTarget: true
    rpp:
--- a/.azuredevops/templates/steps/test.yml
+++ b/.azuredevops/templates/steps/test.yml
@@ -13,7 +13,7 @@ parameters:
  default: ctest
 - name: testParameters
  type: string
-  default: --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml
+  default: --output-on-failure --force-new-ctest-process --output-junit test_output.xml
 - name: extraTestParameters
  type: string
  default: ''
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 .venv
 .vscode
 build
 __pycache__
 # documentation artifacts
 _build/
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -27,7 +27,6 @@ ASICs
 ASan
 ASAN
 ASm
 Async
 ATI
 atomicRMW
 AddressSanitizer
@@ -35,7 +34,6 @@ AlexNet
 Andrej
 Arb
 Autocast
 autograd
 BARs
 BatchNorm
 BLAS
@@ -45,7 +43,6 @@ Blit
 Blockwise
 Bluefield
 Bootloader
 Broadcom
 CAS
 CCD
 CDNA
@@ -65,7 +62,6 @@ CPU
 CPUs
 Cron
 CSC
 CSDATA
 CSE
 CSV
 CSn
@@ -75,11 +71,9 @@ CU
 CUDA
 CUs
 CXX
 CX
 Cavium
 CentOS
 ChatGPT
 Cholesky
 CoRR
 Codespaces
 Commitizen
@@ -87,13 +81,10 @@ CommonMark
 Concretized
 Conda
 ConnectX
 CountOnes
 CuPy
 customizable
 da
 Dashboarding
 Dataloading
 dataflows
 DBRX
 DDR
 DF
@@ -106,7 +97,6 @@ DIMM
 DKMS
 DL
 DMA
 DOMContentLoaded
 DNN
 DNNL
 DPM
@@ -125,8 +115,6 @@ Dependabot
 Deprecations
 DevCap
 DirectX
 Disaggregated
 disaggregated
 Dockerfile
 Dockerized
 Doxygen
@@ -135,14 +123,9 @@ ELMo
 ENDPGM
 EPYC
 ESXi
 EP
 EoS
 etcd
 equalto
 fas
 FBGEMM
 FiLM
 FIFOs
 FFT
 FFTs
 FFmpeg
@@ -155,19 +138,15 @@ Filesystem
 FindDb
 Flang
 FlashAttention
 FlashInfer’s
 FlashInfer
 FluxBenchmark
 Fortran
 Fuyu
 GALB
 GAT
 GATNE
 GCC
 GCD
 GCDs
 GCN
 GCNN
 GDB
 GDDR
 GDR
@@ -177,7 +156,6 @@ GEMMs
 GFLOPS
 GFortran
 GFXIP
 GGUF
 Gemma
 GiB
 GIM
@@ -186,19 +164,15 @@ Glibc
 GLXT
 Gloo
 GMI
 GNN
 GNNs
 GPG
 GPR
 GPT
 GPU
 GPU's
 GPUDirect
 GPUs
-GraphBolt
+Graphbolt
 GraphSage
 GRBM
 GRE
 GenAI
 GenZ
 GitHub
@@ -225,11 +199,7 @@ Haswell
 Higgs
 href
 Hyperparameters
 HybridEngine
 Huggingface
 Hunyuan
 HunyuanVideo
 IB
 ICD
 ICT
 ICV
@@ -238,11 +208,8 @@ IDEs
 IFWI
 IMDb
 IncDec
 instrSize
 interpolators
 IOMMU
 IOP
 IOPS
 IOPM
 IOV
 IRQ
@@ -259,9 +226,7 @@ Intersphinx
 Intra
 Ioffe
 JAX's
 JAXLIB
 Jinja
 js
 JSON
 Jupyter
 KFD
@@ -281,16 +246,12 @@ LLM
 LLMs
 LLVM
 LM
 logsumexp
 LRU
 LSAN
 LSan
 LTS
 LSTMs
 LteAll
 LanguageCrossEntropy
 LoRA
 MECO
 MEM
 MERCHANTABILITY
 MFMA
@@ -309,7 +270,6 @@ MNIST
 MPI
 MPT
 MSVC
 mul
 MVAPICH
 MVFFR
 Makefile
@@ -317,7 +277,6 @@ Makefiles
 Matplotlib
 Matrox
 MaxText
 MBT
 Megablocks
 Megatrends
 Megatron
@@ -327,18 +286,13 @@ Meta's
 Miniconda
 MirroredStrategy
 Mixtral
 MLA
 MosaicML
 MoEs
 Mooncake
 Mpops
 Multicore
 Multithreaded
 mx
 MXFP
 MyEnvironment
 MyST
 NANOO
 NBIO
 NBIOs
 NCCL
@@ -371,7 +325,6 @@ OFED
 OMM
 OMP
 OMPI
 OOM
 OMPT
 OMPX
 ONNX
@@ -394,11 +347,9 @@ PCC
 PCI
 PCIe
 PEFT
 perf
 PEQT
 PIL
 PILImage
 PJRT
 POR
 PRNG
 PRs
@@ -418,7 +369,6 @@ Profiler's
 PyPi
 Pytest
 PyTorch
 QPS
 Qcycles
 Qwen
 RAII
@@ -480,9 +430,7 @@ SKU
 SKUs
 SLES
 SLURM
 Slurm
 SMEM
 SMFMA
 SMI
 SMT
 SPI
@@ -494,24 +442,18 @@ SWE
 SerDes
 ShareGPT
 Shlens
 simd
 Skylake
 Softmax
 Spack
 SplitK
 Supermicro
 Szegedy
 TagRAM
 TCA
 TCC
 TCCs
 TCI
 TCIU
 TCP
 TCR
 TVM
 THREADGROUPS
 threadgroups
 TensorRT
 TensorFloat
 TF
@@ -521,12 +463,13 @@ TPS
 TPU
 TPUs
 TSME
 Taichi
 Taichi's
 Tagram
 TensileLite
 TensorBoard
 TensorFlow
 TensorParallel
 TheRock
 ToC
 TorchAudio
 torchaudio
@@ -544,7 +487,6 @@ UAC
 UC
 UCC
 UCX
 ud
 UE
 UIF
 UMC
@@ -555,11 +497,9 @@ UltraChat
 Uncached
 Unittests
 Unhandled
 unwindowed
 VALU
 VBIOS
 VCN
 verl's
 VGPR
 VGPRs
 VM
@@ -572,13 +512,11 @@ Vanhoucke
 Vulkan
 WGP
 WGPs
 WR
 WX
 WikiText
 Wojna
 Workgroups
 Writebacks
 xcc
 XCD
 XCDs
 XGBoost
@@ -599,7 +537,6 @@ ZenDNN
 accuracies
 activations
 addr
 addEventListener
 ade
 ai
 alloc
@@ -615,7 +552,6 @@ autogenerated
 autotune
 avx
 awk
 az
 backend
 backends
 bb
@@ -633,7 +569,6 @@ boson
 bosons
 br
 BrainFloat
 btn
 buildable
 bursty
 bzip
@@ -645,21 +580,18 @@ centric
 changelog
 checkpointing
 chiplet
 classList
 cmake
 cmd
 coalescable
 codename
 collater
 comgr
 compat
 completers
 composable
 concretization
 config
 configs
 conformant
 const
 constructible
 convolutional
 convolves
@@ -694,14 +626,12 @@ denoised
 denoises
 denormalize
 dequantization
 dequantized
 dequantizes
 deserializers
 detections
 dev
 devicelibs
 devsel
 dgl
 dimensionality
 disambiguates
 distro
@@ -725,7 +655,6 @@ exascale
 executables
 ffmpeg
 filesystem
 forEach
 fortran
 fp
 framebuffer
@@ -734,16 +663,13 @@ galb
 gcc
 gdb
 gemm
 getAttribute
 gfortran
 gfx
 githooks
 github
 globals
 gnupg
 gpu
 grayscale
 gx
 gzip
 heterogenous
 hipBLAS
@@ -796,7 +722,6 @@ invariants
 invocating
 ipo
 jax
 json
 kdb
 kfd
 kv
@@ -810,7 +735,6 @@ linalg
 linearized
 linter
 linux
 llm
 llvm
 lm
 localscratch
@@ -818,8 +742,6 @@ logits
 lossy
 macOS
 matchers
 maxtext
 megablocks
 megatron
 microarchitecture
 migraphx
@@ -848,7 +770,6 @@ opencv
 openmp
 openssl
 optimizers
 ol
 os
 oversubscription
 pageable
@@ -856,13 +777,10 @@ pallas
 parallelization
 parallelizing
 param
 params
 parameterization
 passthrough
 pe
 perfcounter
 performant
 piecewise
 perl
 pragma
 pre
@@ -890,7 +808,6 @@ profiler
 profilers
 protobuf
 pseudorandom
 px
 py
 pytorch
 recommender
@@ -898,12 +815,9 @@ recommenders
 quantile
 quantizer
 quasirandom
 querySelector
 querySelectorAll
 queueing
 qwen
 radeon
 rc
 rccl
 rdc
 rdma
@@ -919,8 +833,6 @@ req
 resampling
 rescaling
 reusability
 rhel
 rl
 RLHF
 roadmap
 roc
@@ -965,24 +877,19 @@ scalability
 scalable
 scipy
 seealso
 selectattr
 selectedTag
 sendmsg
 seqs
 serializers
 setAttribute
 sglang
 shader
 sharding
 sigmoid
 sles
 sm
 smi
 softmax
 spack
 spmm
 src
 stanford
 stochastically
 strided
 subcommand
@@ -999,10 +906,8 @@ symlink
 symlinks
 sys
 tabindex
 targetContainer
 td
 tensorfloat
 tf
 th
 tokenization
 tokenize
@@ -1011,12 +916,9 @@ tokenizer
 tokenizes
 toolchain
 toolchains
 topk
 toolset
 toolsets
 torchtitan
 torchvision
 tp
 tqdm
 tracebacks
 txt
@@ -1039,7 +941,6 @@ USM
 UTCL
 UTIL
 utils
 UX
 vL
 variational
 vdi
@@ -1069,8 +970,6 @@ writebacks
 wrreq
 wzo
 xargs
 xdit
 xDiT
 xGMI
 xPacked
 xz
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/RELEASE.md
+++ b/RELEASE.md
--- a/default.xml
+++ b/default.xml
@@ -1,17 +1,32 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.1.1"
+    <default revision="refs/tags/rocm-6.4.3"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
 <!--list of projects for ROCm-->
    <project name="ROCK-Kernel-Driver" />
    <project name="ROCR-Runtime" />
    <project name="amdsmi" />
    <project name="rdc" />
    <project name="rocm_bandwidth_test" />
    <project name="rocm_smi_lib" />
    <project name="rocm-core" />
    <project name="rocm-examples" />
    <project name="rocminfo" />
    <project name="rocprofiler" />
    <project name="rocprofiler-register" />
    <project name="rocprofiler-sdk" />
    <project name="rocprofiler-compute" />
    <project name="rocprofiler-systems" />
    <project name="roctracer" />
 <!--HIP Projects-->
    <project name="HIP" />
    <project name="hip-tests" />
    <project name="HIPIFY" />
    <project name="clr" />
    <project name="hipother" />
 <!-- The following projects are all associated with the AMDGPU LLVM compiler -->
    <project name="half" />
    <project name="llvm-project" />
@@ -22,32 +37,36 @@
    <project name="rocr_debug_agent" />
 <!-- ROCm Libraries -->
    <project groups="mathlibs" name="AMDMIGraphX" />
    <project groups="mathlibs" name="MIOpen" />
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="Tensile" />
    <project groups="mathlibs" name="composable_kernel" />
    <project groups="mathlibs" name="hipBLAS-common" />
    <project groups="mathlibs" name="hipBLAS" />
    <project groups="mathlibs" name="hipBLASLt" />
    <project groups="mathlibs" name="hipCUB" />
    <project groups="mathlibs" name="hipFFT" />
    <project groups="mathlibs" name="hipRAND" />
    <project groups="mathlibs" name="hipSOLVER" />
    <project groups="mathlibs" name="hipSPARSE" />
    <project groups="mathlibs" name="hipSPARSELt" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
    <project groups="mathlibs" name="rocAL" />
    <project groups="mathlibs" name="rocALUTION" />
    <project groups="mathlibs" name="rocBLAS" />
    <project groups="mathlibs" name="rocDecode" />
    <project groups="mathlibs" name="rocJPEG" />
    <!-- The following components have been migrated to rocm-libraries:
        hipBLAS-common hipBLAS hipBLASLt hipCUB
        hipFFT hipRAND hipSPARSE hipSPARSELt
        MIOpen rocBLAS rocFFT rocPRIM rocRAND
        rocSPARSE rocThrust Tensile -->
    <project groups="mathlibs" name="rocm-libraries" />
    <!-- The following components have been migrated to rocm-systems:
        aqlprofile clr hip hip-tests hipother
        rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute 
        rocprofiler-register rocprofiler-sdk rocprofiler-systems 
        rocprofiler rocr-runtime roctracer -->
    <project groups="mathlibs" name="rocm-systems" />
    <project groups="mathlibs" name="rocPyDecode" />
-    <project groups="mathlibs" name="rocSOLVER" />
+    <project groups="mathlibs" name="rocFFT" />
    <project groups="mathlibs" name="rocPRIM" />
    <project groups="mathlibs" name="rocRAND" />
    <project groups="mathlibs" name="rocSHMEM" />
    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocSPARSE" />
    <project groups="mathlibs" name="rocThrust" />
    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
    <project groups="mathlibs" name="rpp" />
--- a/docs/about/license.md
+++ b/docs/about/license.md
@@ -25,69 +25,69 @@ additional licenses. Please review individual repositories for more information.
 <!-- spellcheck-disable -->
 | Component | License |
 |:---------------------|:-------------------------|
-| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/LICENSE.md) |
+| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
 | [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
 | [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
 | [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
-| [AQLprofile](https://github.com/ROCm/rocm-systems/tree/develop/projects/aqlprofile/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/aqlprofile/LICENSE.md) |
+| [AQLprofile] | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE) |
 | [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
 | [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
 | [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
-| [HIP](https://github.com/ROCm/rocm-systems/tree/develop/projects/hip/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/hip/LICENSE.md) |
+| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
-| [hipamd](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/hipamd/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/hipamd/LICENSE.md) |
+| [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
-| [hipBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblas/LICENSE.md) |
+| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
-| [hipBLASLt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblaslt/LICENSE.md) |
+| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
 | [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
-| [hipCUB](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipcub/) | [Custom](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipcub/LICENSE.txt) |
+| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
-| [hipFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipfft/LICENSE.md) |
+| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
 | [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
 | [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
-| [hipRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiprand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiprand/LICENSE.md) |
+| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
-| [hipSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsolver/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsolver/LICENSE.md) |
+| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
-| [hipSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparse/LICENSE.md) |
+| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
-| [hipSPARSELt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparselt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparselt/LICENSE.md) |
+| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
-| [hipTensor](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiptensor/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiptensor/LICENSE) |
+| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
 | [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
 | [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
 | [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
-| [MIOpen](https://github.com/ROCm/rocm-libraries/tree/develop/projects/miopen/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/miopen/LICENSE.md) |
+| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
 | [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
 | [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
 | [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
-| [rocBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocblas/LICENSE.md) |
+| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
 | [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
 | [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
-| [rocFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocfft/LICENSE.md) |
+| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
 | [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
 | [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
 | [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
-| [rocminfo](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocminfo/License.txt) |
+| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
 | [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
 | [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
 | [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
-| [ROCm-Core](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-core/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-core/LICENSE.md) |
+| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
-| [ROCm Compute Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-compute/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-compute/LICENSE.md) |
+| [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
-| [ROCm Data Center (RDC)](https://github.com/ROCm/rocm-systems/tree/develop/projects/rdc/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rdc/LICENSE.md) |
+| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
 | [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
-| [ROCm-OpenCL-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/opencl/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/opencl/LICENSE.md) |
+| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
 | [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
-| [ROCm SMI Lib](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-smi-lib/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-smi-lib/LICENSE.md) |
+| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
-| [ROCm Systems Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-systems/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-systems/LICENSE.md) |
+| [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
 | [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
-| [rocPRIM](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocprim/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocprim/LICENSE.md) |
+| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
-| [ROCProfiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler/LICENSE.md) |
+| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
-| [ROCprofiler-SDK](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-sdk/LICENSE.md) |
+| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
 | [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
-| [rocRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocrand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocrand/LICENSE.md) |
+| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
 | [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
-| [ROCR-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocr-runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocr-runtime/LICENSE.txt) |
+| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
 | [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
-| [rocSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsolver/) | [BSD-2-Clause](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsolver/LICENSE.md) |
+| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
-| [rocSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsparse/LICENSE.md) |
+| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
-| [rocThrust](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocthrust/) | [Apache 2.0](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocthrust/LICENSE) |
+| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
-| [ROCTracer](https://github.com/ROCm/rocm-systems/tree/develop/projects/roctracer/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/roctracer/LICENSE.md) |
+| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
-| [rocWMMA](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocwmma/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/LICENSE.md) |
+| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
-| [Tensile](https://github.com/ROCm/rocm-libraries/tree/develop/shared/tensile/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/shared/tensile/LICENSE.md) |
+| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
 | [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
 Open sourced ROCm components are released via public GitHub
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -1,136 +1,131 @@
-ROCm Version,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
+ROCm Version,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
-      :ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility-past-60]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
+      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
-      ,,,,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
+      ,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 10.1, 10.0, 9.7, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
+      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
+      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
-      ,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
+      ,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
-      ,,,,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
+      ,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
-      ,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8",Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,,,
+      ,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
-      ,"Debian 13, 12","Debian 13, 12","Debian 13, 12",Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,,,,,,,,,,,
+      ,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
-      ,,,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,,,,,,,,,,,,
+      ,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
-      ,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,,,,,,,,,,,,,,,,,,
+      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,CDNA4,CDNA4,,,,,,,,,,,,,,,,,,
+      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
-      ,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
+      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
-      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
+      ,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
-      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
+      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
-      ,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
+      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
-      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
+      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      ,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`  [#gpu-compatibility-past-60]_,gfx950,gfx950,gfx950,gfx950,,,,,,,,,,,,,,,,,,
+,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,,,,,,,,,,,,,,,
+      ,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
-      ,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,,,,,,,,,,,,,,,
+      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
-      ,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,,,,,,,,,,,,,,,
+      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
-      ,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
+      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
-      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
+      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
-      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942, gfx942, gfx942, gfx942, gfx942, gfx942, gfx942
+,,,,,,,,,,,,,,,,,,
-      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
+      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
+      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
+      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
+      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
-      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,2.4.0,2.4.0,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
-      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+,,,,,,,,,,,,,,,,,,
-      :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.51.1,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,b6652,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
-      ,,,,,,,,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,,,,,,
+      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      Thrust,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.4.0,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
+      CUB,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.17.0,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
+,,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,,,,,,
+      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
-      Thrust,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      ,,,,,,,,,,,,,,,,,,
-      CUB,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
-     DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
+      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
+      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
-      :doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.13.0,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
+      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.5.0,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
+      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
-      :doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,1.0.0,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
+      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      :doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
-      :doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.6.0,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`RPP <rpp:index>`,2.1.0,2.1.0,2.0.0,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
+      ,,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,,,,,,
+      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
-      :doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.26.6,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
+      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
-      :doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
+      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
-      :doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,3.0.2,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
+      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
-      :doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,1.0.0,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
+      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      :doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.20,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
-      :doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.7.0,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
+      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
-      :doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
+      :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
-      :doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
+      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
-      :doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,4.0.1,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.4,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
+      :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
-      :doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,4.0.0,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
+      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
-      :doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,5.0.2,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
+      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
-      :doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.34,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
+      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
-      :doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.30.1,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
+      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      :doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,4.0.2,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
+      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,2.0.0,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
+      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
-      :doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.44.0,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
+      :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      SUPPORT LIBS,,,,,,,,,,,,,,,,,,
-      :doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,2.0.0,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
+      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,4.0.1,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
-      :doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
-      ,,,,,,,,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,,,,
-      SUPPORT LIBS,,,,,,,,,,,,,,,,,,,,,,
+      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
+      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
+      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
-      :doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,26.0.2,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
+      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.8.0,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
-      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,2.6.0,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
+      :doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.2.3,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.1.1,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
+      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.70002,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
+      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,1.0.0,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
-      :doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.70002,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
-      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,,,,
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
-      :doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,20.0.0,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
+      ,,,,,,,,,,,,,,,,,,
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.4,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
+      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,16.3.0,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
+      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
+      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.1.0,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
+      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
+,,,,,,,,,,,,,,,,,,
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
-      `Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
-      ,,,,,,,,,,,,,,,,,,,,,,
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
      :doc:`HIP <hip:index>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.18.0,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -10,9 +10,10 @@ Use this matrix to view the ROCm compatibility and system requirements across su
 You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
-GPUs listed in the following table support compute workloads (no display
+Accelerators and GPUs listed in the following table support compute workloads (no display
-information or graphics). If you’re using ROCm with AMD Radeon GPUs or Ryzen APUs for graphics
+information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
-workloads, see the :doc:`Use ROCm on Radeon and Ryzen <radeon:index>` to verify
+workloads, see the `Use ROCm on Radeon GPU documentation
 <https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
 compatibility and system requirements.
 .. |br| raw:: html
@@ -22,31 +23,28 @@ compatibility and system requirements.
 .. container:: format-big-table
  .. csv-table::
-      :header: "ROCm Version", "7.1.1", "7.1.0", "6.4.0"
+      :header: "ROCm Version", "6.4.3", "6.4.2", "6.3.0"
      :stub-columns: 1
-      :ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
-      ,"RHEL 10.1, 10.0,  9.7, |br| 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.5, 9.4"
+      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
      ,RHEL 8.10,RHEL 8.10,RHEL 8.10
-      ,SLES 15 SP7,SLES 15 SP7,SLES 15 SP6
+      ,"SLES 15 SP7, SP6","SLES 15 SP7, SP6","SLES 15 SP6, SP5"
-      ,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8"
+      ,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
-      ,"Debian 13, 12","Debian 13, 12",Debian 12
+      ,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
-      ,,,Azure Linux 3.0
+      ,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
      ,Rocky Linux 9,Rocky Linux 9,
      ,.. _architecture-support-compatibility-matrix:,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
      ,CDNA3,CDNA3,CDNA3
      ,CDNA2,CDNA2,CDNA2
      ,CDNA,CDNA,CDNA
      ,RDNA4,RDNA4,
      ,RDNA3,RDNA3,RDNA3
      ,RDNA2,RDNA2,RDNA2
      ,.. _gpu-support-compatibility-matrix:,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility]_,gfx950,gfx950,
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
-      ,gfx1201,gfx1201,
+      ,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
-      ,gfx1200,gfx1200,
+      ,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
      ,gfx1101,gfx1101,
      ,gfx1100,gfx1100,gfx1100
      ,gfx1030,gfx1030,gfx1030
      ,gfx942,gfx942,gfx942
@@ -54,122 +52,151 @@ compatibility and system requirements.
      ,gfx908,gfx908,gfx908
      ,,,
      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.6, 2.5, 2.4, 2.3"
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.4.35
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
-      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0
+      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
-      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,b5997
+      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.20.0
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
      ,,,
      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.3.0
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
      ,,,
      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
-      Thrust,2.8.5,2.8.5,2.5.0
+      Thrust,2.5.0,2.5.0,2.3.2
-      CUB,2.8.5,2.8.5,2.5.0
+      CUB,2.5.0,2.5.0,2.3.2
      ,,,
-      DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
+      KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM]_, |br| 30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x","30.20.0 [#mi325x_KVM]_, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
+      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
      ,,,
      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.12.0
+      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.4.0
+      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.2.0
+      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
-      :doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.2.0
+      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
-      :doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,0.10.0
+      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
-      :doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,0.8.0
+      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
-      :doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.3.1
+      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
-      :doc:`RPP <rpp:index>`,2.1.0,2.1.0,1.9.10
+      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
      ,,,
      COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
-      :doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.22.3
+      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
-      :doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,2.0.0
+      :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,N/A
      ,,,
      MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,2.4.0
+      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,0.12.0
+      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
-      :doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.18
+      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
-      :doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.6.0
+      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,2.12.0
+      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,2.4.0
+      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,3.2.0
+      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.3
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
-      :doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,3.2.2
+      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
-      :doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,4.4.0
+      :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.3.0
-      :doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.32
+      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
-      :doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,3.3.0
+      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
-      :doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.28.0
+      :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.27.0
-      :doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,3.4.0
+      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,1.7.0
+      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
-      :doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.43.0
+      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
      ,,,
      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
-      :doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,3.4.0
+      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,1.5.0
+      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
-      :doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,3.4.0
+      :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.3.0
-      :doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,3.3.0
+      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
      ,,,
      SUPPORT LIBS,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,6.4.43482
+      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,6.4.0
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.3.0
      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
      ,,,
      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
-      :doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,25.3.0
+      :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,24.7.1
-      :doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,0.3.0
+      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.5.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.4.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.1.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
      ,,,
      PERFORMANCE TOOLS,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,1.4.0
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.1.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.0.0
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.0.0
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,0.1.0
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.60400
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60300
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,0.6.0
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.60400
+      :doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60300
      ,,,
      DEVELOPMENT TOOLS,,,
-      :doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,19.0.0
+      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.2
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,15.2.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.4.0
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.0.4
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
      ,,,
      COMPILERS,.. _compilers-support-compatibility-matrix:,,
      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
-      `Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,19.0.0.25133
+      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,18.0.0.24455
-      :doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,19.0.0.25133
+      :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,18.0.0.24491
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,19.0.0.25133
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,18.0.0.24491
      ,,,
      RUNTIMES,.. _runtime-support-compatibility-matrix:,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,6.4.43482
+      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.3.42131
-      :doc:`HIP <hip:index>`,7.1.52802,7.1.25424,6.4.43482
+      :doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.3.42131
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
 .. rubric:: Footnotes
-.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
+.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
-.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__.
+.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-.. [#dgl_compat] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
+.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 7.0.0 and ROCm 6.4.x.
+.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
-.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
+.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
 .. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
 .. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
 .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
 .. _OS-kernel-versions:
 Operating systems, kernel and Glibc versions
 *********************************************
-For detailed information on operating system supported on ROCm 7.1.1 and associated Kernel and Glibc version, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
+Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
 .. csv-table::
   :header: "OS", "Version", "Kernel", "Glibc"
   :widths: 40, 20, 30, 20
   :stub-columns: 1
   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
   ,,
   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
   ,,
   `Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
   ,9.5, 5.14+, 2.34
   ,9.4, 5.14+, 2.34
   ,9.3, 5.14+, 2.34
   ,,
   `Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
   ,8.9, 4.18.0, 2.28
   ,,
   `SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
   ,15 SP6, "6.5.0+, 6.4.0", 2.38
   ,15 SP5, 5.14.21, 2.31
   ,,
   `Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
   ,8, 5.15.0 (UEK), 2.28
   ,,
   `Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
   ,,
   `Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
   ,,
 .. note::
@@ -201,18 +228,24 @@ Expand for full historical view of:
   .. rubric:: Footnotes
-   .. [#os-compatibility-past-60] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
+   .. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
-   .. [#gpu-compatibility-past-60] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__.
+   .. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-   .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
+   .. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-   .. [#verl_compat-past-60] verl is only supported on ROCm 7.0.0 and 6.2.0.
+   .. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
-   .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0.
+   .. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#dgl_compat-past-60] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
+   .. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0.
+   .. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#ray_compat-past-60] Ray is only supported on ROCm 7.0.0 and 6.4.1.
+   .. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 7.0.0 and 6.4.x.
+   .. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#flashinfer_compat-past-60] FlashInfer is only supported on ROCm 6.4.1.
+   .. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#mi325x_KVM-past-60] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
+   .. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
-   .. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
+   .. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
-   .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
+   .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
   .. [#verl_compat] verl is only supported on ROCm 6.2.0.
   .. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
   .. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
   .. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
   .. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
   .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
   .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
--- a/docs/compatibility/ml-compatibility/dgl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/dgl-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
    :description: Deep Graph Library (DGL) compatibility
-    :keywords: GPU, CPU, deep graph library, DGL, deep learning, framework compatibility
+    :keywords: GPU, DGL compatibility
 .. version-set:: rocm_version latest
@@ -10,274 +10,215 @@
 DGL compatibility
 ********************************************************************************
-Deep Graph Library (`DGL <https://www.dgl.ai/>`__) is an easy-to-use, high-performance, and scalable 
+Deep Graph Library `(DGL) <https://www.dgl.ai/>`_ is an easy-to-use, high-performance and scalable 
 Python package for deep learning on graphs. DGL is framework agnostic, meaning 
-that if a deep graph model is a component in an end-to-end application, the rest of 
+if a deep graph model is a component in an end-to-end application, the rest of 
 the logic is implemented using PyTorch.  
-DGL provides a high-performance graph object that can reside on either CPUs or GPUs. 
+* ROCm support for DGL is hosted in the `https://github.com/ROCm/dgl <https://github.com/ROCm/dgl>`_ repository. 
-It bundles structural data features for better control and provides a variety of functions 
+* Due to independent compatibility considerations, this location differs from the `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`_ upstream repository. 
-for computing with graph objects, including efficient and customizable message passing 
+* Use the prebuilt :ref:`Docker images <dgl-docker-compat>` with DGL, PyTorch, and ROCm preinstalled.
-primitives for Graph Neural Networks.
+* See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>` 
  to install and get started.
-Support overview
+
 Supported devices
 ================================================================================
- The ROCm-supported version of DGL is maintained in the official `https://github.com/ROCm/dgl 
+- **Officially Supported**: TF32 with AMD Instinct MI300X (through hipblaslt)
-  <https://github.com/ROCm/dgl>`__ repository, which differs from the 
+- **Partially Supported**: TF32 with AMD Instinct MI250X
  `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`__ upstream repository.
 - To get started and install DGL on ROCm, use the prebuilt :ref:`Docker images <dgl-docker-compat>`, 
  which include ROCm, DGL, and all required dependencies.
-  - See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>` 
+.. _dgl-recommendations:
-    for installation and setup instructions.
+
 Use cases and recommendations
 ================================================================================
 DGL can be used for Graph Learning, and building popular graph models like  
 GAT, GCN and GraphSage. Using these we can support a variety of use-cases such as:
 - Recommender systems
 - Network Optimization and Analysis
 - 1D (Temporal) and 2D (Image) Classification
 - Drug Discovery
 Multiple use cases of DGL have been tested and verified.
 However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
 Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_, 
 where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs. 
 Coverage includes:
 - Single-GPU training/inference
 - Multi-GPU training
  - You can also consult the upstream `Installation guide <https://www.dgl.ai/pages/start.html>`__ 
    for additional context.
 .. _dgl-docker-compat:
-Compatibility matrix
+Docker image compatibility
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
-AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl/tags>`__
+AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl>`_
-with ROCm backends on Docker Hub. The following Docker image tags and associated
+with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
-inventories represent the latest available DGL version from the official Docker Hub. 
+inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
 Click the |docker-icon| to view the image on Docker Hub.
-.. list-table::
+.. list-table:: DGL Docker image components
    :header-rows: 1
    :class: docker-image-compatibility
-    * - Docker image
+    * - Docker
      - ROCm
      - DGL
      - PyTorch
      - Ubuntu
      - Python
      - GPU
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
-      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
      - `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - MI300X, MI250X
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
-      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-      - MI300X, MI250X
+
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
-      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - MI300X, MI250X
+
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
-      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
      - MI300X, MI250X
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
      - MI300X, MI250X
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
      - MI300X, MI250X
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - MI300X, MI250X
+      
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
      - MI300X, MI250X
 .. _dgl-key-rocm-libraries:
 Key ROCm libraries for DGL
 ================================================================================
 DGL on ROCm depends on specific libraries that affect its features and performance.
-Using the DGL Docker container or building it with the provided Docker file or a ROCm base image is recommended.
+Using the DGL Docker container or building it with the provided docker file or a ROCm base image is recommended.
 If you prefer to build it yourself, ensure the following dependencies are installed:
 .. list-table:: 
    :header-rows: 1
    * - ROCm library
-      - ROCm 7.0.0 Version
+      - Version
      - ROCm 6.4.x Version
      - Purpose
    * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
-      - 1.1.0
+      - :version-ref:`"Composable Kernel" rocm_version`
      - 1.1.0
      - Enables faster execution of core operations like matrix multiplication
        (GEMM), convolutions and transformations.
    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
-      - 3.0.0
+      - :version-ref:`hipBLAS rocm_version`
      - 2.4.0
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
-      - 1.0.0
+      - :version-ref:`hipBLASLt rocm_version`
      - 0.12.0
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
-      - 4.0.0
+      - :version-ref:`hipCUB rocm_version`
      - 3.4.0
      - Provides a C++ template library for parallel algorithms for reduction,
        scan, sort and select.
    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
-      - 1.0.20
+      - :version-ref:`hipFFT rocm_version`
      - 1.0.18
      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
-      - 3.0.0
+      - :version-ref:`hipRAND rocm_version`
      - 2.12.0
      - Provides fast random number generation for GPUs.
    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
-      - 3.0.0
+      - :version-ref:`hipSOLVER rocm_version`
      - 2.4.0
      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
        singular value decompositions (SVD).
    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
-      - 4.0.1
+      - :version-ref:`hipSPARSE rocm_version`
      - 3.2.0
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
-      - 0.2.4
+      - :version-ref:`hipSPARSELt rocm_version`
      - 0.2.3
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipTensor <https://github.com/ROCm/hipTensor>`_
-      - 2.0.0
+      - :version-ref:`hipTensor rocm_version`
      - 1.5.0
      - Optimizes for high-performance tensor operations, such as contractions.
    * - `MIOpen <https://github.com/ROCm/MIOpen>`_
-      - 3.5.0
+      - :version-ref:`MIOpen rocm_version`
      - 3.4.0
      - Optimizes deep learning primitives such as convolutions, pooling,
        normalization, and activation functions.
    * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
-      - 2.13.0
+      - :version-ref:`MIGraphX rocm_version`
      - 2.12.0
      - Adds graph-level optimizations, ONNX models and mixed precision support
        and enable Ahead-of-Time (AOT) Compilation.
    * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
-      - 3.3.0
+      - :version-ref:`MIVisionX rocm_version`
      - 3.2.0
      - Optimizes acceleration for computer vision and AI workloads like
        preprocessing, augmentation, and inferencing.
    * - `rocAL <https://github.com/ROCm/rocAL>`_
-      - 3.3.0
+      - :version-ref:`rocAL rocm_version`
      - 2.2.0
      - Accelerates the data pipeline by offloading intensive preprocessing and
        augmentation tasks. rocAL is part of MIVisionX.
    * - `RCCL <https://github.com/ROCm/rccl>`_
-      - 2.26.6
+      - :version-ref:`RCCL rocm_version`
      - 2.22.3
      - Optimizes for multi-GPU communication for operations like AllReduce and
        Broadcast.
    * - `rocDecode <https://github.com/ROCm/rocDecode>`_
-      - 1.0.0
+      - :version-ref:`rocDecode rocm_version`
      - 0.10.0
      - Provides hardware-accelerated data decoding capabilities, particularly
        for image, video, and other dataset formats.
    * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
-      - 1.1.0
+      - :version-ref:`rocJPEG rocm_version`
      - 0.8.0
      - Provides hardware-accelerated JPEG image decoding and encoding.
    * - `RPP <https://github.com/ROCm/RPP>`_
-      - 2.0.0
+      - :version-ref:`RPP rocm_version`
      - 1.9.10
      - Speeds up data augmentation, transformation, and other preprocessing steps.
    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
-      - 4.0.0
+      - :version-ref:`rocThrust rocm_version`
      - 3.3.0
      - Provides a C++ template library for parallel algorithms like sorting,
        reduction, and scanning.
    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
-      - 2.0.0
+      - :version-ref:`rocWMMA rocm_version`
      - 1.7.0
      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
        multiplication (GEMM) and accumulation operations with mixed precision
        support.
 .. _dgl-supported-features-latest:
-Supported features with ROCm 7.0.0
+Supported features
 ================================================================================
-Many functions and methods available upstream are also supported in DGL on ROCm.
+Many functions and methods available in DGL Upstream are also supported in DGL ROCm.
 Instead of listing them all, support is grouped into the following categories to provide a general overview. 
 * DGL Base
 * DGL Backend 
 * DGL Data
 * DGL Dataloading
-* DGL Graph
+* DGL DGLGraph
 * DGL Function
 * DGL Ops
 * DGL Sampling
@@ -289,76 +230,26 @@ Instead of listing them all, support is grouped into the following categories to
 * DGL NN
 * DGL Optim
 * DGL Sparse
 * GraphBolt
 .. _dgl-unsupported-features-latest:
-Unsupported features with ROCm 7.0.0
+Unsupported features
 ================================================================================
-* TF32 Support (only supported for PyTorch 2.7 and above)
+* Graphbolt
-* Kineto/ROCTracer integration
+* Partial TF32 Support (MI250x only)
 * Kineto/ ROCTracer integration
 .. _dgl-unsupported-functions:
-Unsupported functions with ROCm 7.0.0
+Unsupported functions
 ================================================================================
-* ``bfs``
+* ``more_nnz``
 * ``format``
 * ``multiprocess_sparse_adam_state_dict``
 * ``record_stream_ndarray``
 * ``half_spmm``
 * ``segment_mm`` 
 * ``gather_mm_idx_b``
 * ``pgexplainer``
 * ``sample_labors_prob``
 * ``sample_labors_noprob``
 * ``sparse_admin``
 .. _dgl-recommendations:
 Use cases and recommendations
 ================================================================================
 DGL can be used for Graph Learning, and building popular graph models like  
 GAT, GCN, and GraphSage. Using these models, a variety of use cases are supported:
 - Recommender systems
 - Network Optimization and Analysis
 - 1D (Temporal) and 2D (Image) Classification
 - Drug Discovery
 For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
 where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
 * Although multiple use cases of DGL have been tested and verified, a few have been  
  outlined in the `DGL in the Real World: Running GNNs on Real Use Cases 
  <https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog 
  post, which walks through four real-world graph neural network (GNN) workloads 
  implemented with the Deep Graph Library on ROCm. It covers tasks ranging from 
  heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph 
  regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use 
  case, the authors detail: the dataset and task, how DGL is used, and their experience 
  porting to ROCm. It is shown that DGL codebases often run without modification, with 
  seamless integration of graph operations, message passing, sampling, and convolution. 
 * The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware 
  <https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__ 
  blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform, 
  bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges 
  the gap between dense tensor frameworks and the irregular nature of graph data through a 
  graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and 
  interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration 
  enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers 
  and open-source repositories. This marks a major step in AMD's mission to advance open, 
  scalable AI ecosystems beyond traditional architectures.
 You can pre-process datasets and begin training on AMD GPUs through:
 * Single-GPU training/inference
 * Multi-GPU training
 Previous versions
 ===============================================================================
 See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
 of the ``ROCm/dgl`` Docker image.
--- a/docs/compatibility/ml-compatibility/flashinfer-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/flashinfer-compatibility.rst
@@ -1,98 +0,0 @@
 :orphan:
 .. meta::
    :description: FlashInfer compatibility
    :keywords: GPU, LLM, FlashInfer, deep learning, framework compatibility
 .. version-set:: rocm_version latest
 ********************************************************************************
 FlashInfer compatibility
 ********************************************************************************
 `FlashInfer <https://docs.flashinfer.ai/index.html>`__ is a library and kernel generator 
 for Large Language Models (LLMs) that provides a high-performance implementation of graphics 
 processing units (GPUs) kernels. FlashInfer focuses on LLM serving and inference, as well 
 as advanced performance across diverse scenarios.
 FlashInfer features highly efficient attention kernels, load-balanced scheduling, and memory-optimized 
 techniques, while supporting customized attention variants. It’s compatible with ``torch.compile``, and 
 offers high-performance LLM-specific operators, with easy integration through PyTorch, and C++ APIs.
 .. note::
  The ROCm port of FlashInfer is under active development, and some features are not yet available. 
  For the latest feature compatibility matrix, refer to the ``README`` of the 
  `https://github.com/ROCm/flashinfer <https://github.com/ROCm/flashinfer>`__ repository.
 Support overview
 ================================================================================
 - The ROCm-supported version of FlashInfer is maintained in the official `https://github.com/ROCm/flashinfer 
  <https://github.com/ROCm/flashinfer>`__ repository, which differs from the 
  `https://github.com/flashinfer-ai/flashinfer <https://github.com/flashinfer-ai/flashinfer>`__ 
  upstream repository.
 - To get started and install FlashInfer on ROCm, use the prebuilt :ref:`Docker images <flashinfer-docker-compat>`, 
  which include ROCm, FlashInfer, and all required dependencies.
  - See the :doc:`ROCm FlashInfer installation guide <rocm-install-on-linux:install/3rd-party/flashinfer-install>` 
    for installation and setup instructions.
  - You can also consult the upstream `Installation guide <https://docs.flashinfer.ai/installation.html>`__ 
    for additional context.
 .. _flashinfer-docker-compat:
 Compatibility matrix
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `FlashInfer images <https://hub.docker.com/r/rocm/flashinfer/tags>`__
 with ROCm backends on Docker Hub. The following Docker image tag and associated
 inventories represent the latest available FlashInfer version from the official Docker Hub. 
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: 
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker image
      - ROCm
      - FlashInfer
      - PyTorch
      - Ubuntu
      - Python
      - GPU
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/flashinfer/flashinfer-0.2.5_rocm6.4_ubuntu24.04_py3.12_pytorch2.7/images/sha256-558914838821c88c557fb6d42cfbc1bdb67d79d19759f37c764a9ee801f93313"><i class="fab fa-docker fa-lg"></i> rocm/flashinfer</a>
      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
      - `v0.2.5 <https://github.com/flashinfer-ai/flashinfer/releases/tag/v0.2.5>`__
      - `2.7.1 <https://github.com/ROCm/pytorch/releases/tag/v2.7.1>`__
      - 24.04
      - `3.12 <https://www.python.org/downloads/release/python-3129/>`__
      - MI300X
 .. _flashinfer-recommendations:
 Use cases and recommendations
 ================================================================================
 The release of FlashInfer on ROCm provides the decode functionality for LLM inferencing.
 In the decode phase, tokens are generated sequentially, with the model predicting each new 
 token based on the previously generated tokens and the input context.
 FlashInfer on ROCm brings over upstream features such as load balancing, sparse and dense 
 attention optimizations, and batching support, enabling efficient execution on AMD Instinct™ MI300X GPUs.
 Because large LLMs often require substantial KV caches or long context windows, FlashInfer on ROCm 
 also implements cascade attention from upstream to reduce memory usage. 
 For currently supported use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
 where you can search for examples and best practices to optimize your workloads on AMD GPUs.
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
   :description: JAX compatibility
-   :keywords: GPU, JAX, deep learning, framework compatibility
+   :keywords: GPU, JAX compatibility
 .. version-set:: rocm_version latest
@@ -10,58 +10,42 @@
 JAX compatibility
 *******************************************************************************
-`JAX <https://docs.jax.dev/en/latest/notebooks/thinking_in_jax.html>`__ is a library 
+JAX provides a NumPy-like API, which combines automatic differentiation and the
-for array-oriented numerical computation (similar to NumPy), with automatic differentiation 
+Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
-and just-in-time (JIT) compilation to enable high-performance machine learning research.
+learning at scale.
-JAX provides an API that combines automatic differentiation and the 
+JAX uses composable transformations of Python and NumPy through just-in-time
-Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine 
+(JIT) compilation, automatic vectorization, and parallelization. To learn about
-learning at scale. JAX uses composable transformations of Python and NumPy through 
+JAX, including profiling and optimizations, see the official `JAX documentation
-JIT compilation, automatic vectorization, and parallelization.
+<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
-Support overview
+ROCm support for JAX is upstreamed, and users can build the official source code
-================================================================================
+with ROCm support:
- The ROCm-supported version of JAX is maintained in the official `https://github.com/ROCm/rocm-jax 
+- ROCm JAX release:
  <https://github.com/ROCm/rocm-jax>`__ repository, which differs from the 
  `https://github.com/jax-ml/jax <https://github.com/jax-ml/jax>`__ upstream repository.
- To get started and install JAX on ROCm, use the prebuilt :ref:`Docker images <jax-docker-compat>`, 
+  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
-  which include ROCm, JAX, and all required dependencies.
+    with ROCm and JAX preinstalled.
-  - See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>` 
+  - ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
    for installation and setup instructions.
-  - You can also consult the upstream `Installation guide <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`__ 
+  - See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
-    for additional context.
+    to get started.
-Version support
+- Official JAX release:
 --------------------------------------------------------------------------------
-AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax/tags>`_
+  - Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
 quarterly alongside new ROCm releases. These images undergo full AMD testing.
 `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community/tags>`_
 follow upstream JAX releases and use the latest available ROCm version.
-JAX Plugin-PJRT with JAX/JAXLIB compatibility
+  - See the `AMD GPU (Linux) installation section
-================================================================================
+    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
    the JAX documentation.
-Portable JIT Runtime (PJRT) is an open, stable interface for device runtime and
+.. note::
 compiler. The following table details the ROCm version compatibility matrix
 between JAX Plugin–PJRT and JAX/JAXLIB.
-.. list-table::
+   AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
-    :header-rows: 1
+   quarterly alongside new ROCm releases. These images undergo full AMD testing.
-
+   `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
-    * - JAX Plugin-PJRT
+   follow upstream JAX releases and use the latest available ROCm version.
      - JAX/JAXLIB
      - ROCm
    * - 0.7.1
      - 0.7.1
      - 7.1.1, 7.1.0
    * - 0.6.0
      - 0.6.2, 0.6.0
      - 7.0.2, 7.0.1, 7.0.0
 Use cases and recommendations
 ================================================================================
@@ -87,7 +71,7 @@ Use cases and recommendations
 * The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
  outlines the process of fine-tuning a Bidirectional Encoder Representations
  from Transformers (BERT)-based large language model (LLM) using JAX for a text
-  classification task. The blog post discusses techniques for parallelizing the
+  classification task. The blog post discuss techniques for parallelizing the
  fine-tuning across multiple AMD GPUs and assess the model's performance on a
  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
@@ -95,7 +79,7 @@ Use cases and recommendations
 * The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  GPU using ROCm. The page is aimed at helping users achieve optimal
+  accelerator using ROCm. The page is aimed at helping users achieve optimal
  performance for deep learning and other high-performance computing tasks on
  the MI300X GPU.
@@ -106,15 +90,75 @@ For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.b
 Docker image compatibility
 ================================================================================
-AMD validates and publishes `JAX images <https://hub.docker.com/r/rocm/jax/tags>`__
+.. |docker-icon| raw:: html
 with ROCm backends on Docker Hub.
-For ``jax-community`` images, see `rocm/jax-community
+   <i class="fab fa-docker"></i>
 <https://hub.docker.com/r/rocm/jax-community/tags>`__ on Docker Hub.
-To find the right image tag, see the :ref:`JAX on ROCm installation
+AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
-documentation <rocm-install-on-linux:jax-docker-support>` for a list of
+with ROCm backends on Docker Hub. The following Docker image tags and
-available ``rocm/jax`` images.
+associated inventories represent the latest JAX version from the official Docker Hub and are validated for
 `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
 icon to view the image on Docker Hub.
 .. list-table:: JAX Docker image components
    :header-rows: 1
    * - Docker image
      - JAX
      - Linux
      - Python
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 24.04
      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 22.04
      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
 AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
 with ROCm backends on Docker Hub. The following Docker image tags and
 associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
 .. list-table:: JAX community Docker image components
    :header-rows: 1
    * - Docker image
      - JAX
      - Linux
      - Python
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
      - Ubuntu 22.04
      - `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
      - Ubuntu 22.04
      - `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
      - Ubuntu 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
 .. _key_rocm_libraries:
@@ -250,7 +294,7 @@ The ROCm supported data types in JAX are collected in the following table.
 .. note::
-  JAX data type support is affected by the :ref:`key_rocm_libraries` and it's
+  JAX data type support is effected by the :ref:`key_rocm_libraries` and it's
  collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>`
  page.
@@ -266,54 +310,5 @@ For a complete and up-to-date list of JAX public modules (for example, ``jax.num
  Since version 0.1.56, JAX has full support for ROCm, and the
  :ref:`Known issues and important notes <jax_comp_known_issues>` section
  contains details about limitations specific to the ROCm backend. The list of
-  JAX API modules are maintained by the JAX project and is subject to change.
+  JAX API modules is maintained by the JAX project and is subject to change. 
  Refer to the official Jax documentation for the most up-to-date information.
 Key features and enhancements for ROCm 7.0
 ===============================================================================
 - Upgraded XLA backend: Integrates a newer XLA version, enabling better
  optimizations, broader operator support, and potential performance gains.
 - RNN support: Native RNN support (including LSTMs via ``jax.experimental.rnn``)
  now available on ROCm, aiding sequence model development.
 - Comprehensive linear algebra capabilities: Offers robust ``jax.linalg``
  operations, essential for scientific and machine learning tasks.
 - Expanded AMD GPU architecture support: Provides ongoing support for gfx1101
  GPUs and introduces support for gfx950 and gfx12xx GPUs.
 - Mixed FP8 precision support: Enables ``lax.dot_general`` operations with mixed FP8
  types, offering pathways for memory and compute efficiency.
 - Streamlined PyPi packaging: Provides reliable PyPi wheels for JAX on ROCm,
  simplifying the installation process.
 - Pallas experimental kernel development: Continued Pallas framework
  enhancements for custom GPU kernels, including new intrinsics (specific
  kernel behaviors under review).
 - Improved build system and CI: Enhanced ROCm build system and CI for greater
  reliability and maintainability.
 - Enhanced distributed computing setup: Improved JAX setup in multi-GPU
  distributed environments.
 .. _jax_comp_known_issues:
 Known issues and notes for ROCm 7.0
 ===============================================================================
 - ``nn.dot_product_attention``: Certain configurations of ``jax.nn.dot_product_attention``
  may cause segmentation faults, though the majority of use cases work correctly.
 - SVD with dynamic shapes: SVD on inputs with dynamic/symbolic shapes might result in an error.
  SVD with static shapes is unaffected.
 - QR decomposition with symbolic shapes: QR decomposition operations may fail when using
  symbolic/dynamic shapes in shape polymorphic contexts.
 - Pallas kernels: Specific advanced Pallas kernels may exhibit variations in
  numerical output or resource usage. These are actively reviewed as part of
  Pallas's experimental development.
--- a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
@@ -1,275 +0,0 @@
 :orphan:
 .. meta::
    :description: llama.cpp compatibility
    :keywords: GPU, GGML, llama.cpp, deep learning, framework compatibility
 .. version-set:: rocm_version latest
 ********************************************************************************
 llama.cpp compatibility
 ********************************************************************************
 `llama.cpp <https://github.com/ggml-org/llama.cpp>`__ is an open-source framework 
 for Large Language Model (LLM) inference that runs on both central processing units 
 (CPUs) and graphics processing units (GPUs). It is written in plain C/C++, providing 
 a simple, dependency-free setup. 
 The framework supports multiple quantization options, from 1.5-bit to 8-bit integers, 
 to accelerate inference and reduce memory usage. Originally built as a CPU-first library, 
 llama.cpp is easy to integrate with other programming environments and is widely 
 adopted across diverse platforms, including consumer devices. 
 Support overview
 ================================================================================
 - The ROCm-supported version of llama.cpp is maintained in the official `https://github.com/ROCm/llama.cpp 
  <https://github.com/ROCm/llama.cpp>`__ repository, which differs from the 
  `https://github.com/ggml-org/llama.cpp <https://github.com/ggml-org/llama.cpp>`__ upstream repository.
 - To get started and install llama.cpp on ROCm, use the prebuilt :ref:`Docker images <llama-cpp-docker-compat>`, 
  which include ROCm, llama.cpp, and all required dependencies.
  - See the :doc:`ROCm llama.cpp installation guide <rocm-install-on-linux:install/3rd-party/llama-cpp-install>` 
    for installation and setup instructions.
  - You can also consult the upstream `Installation guide <https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md>`__ 
    for additional context.
 .. _llama-cpp-docker-compat:
 Compatibility matrix
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `llama.cpp images <https://hub.docker.com/r/rocm/llama.cpp/tags>`__
 with ROCm backends on Docker Hub. The following Docker image tags and associated
 inventories represent the latest available llama.cpp versions from the official Docker Hub.
 Click |docker-icon| to view the image on Docker Hub.
 .. important::
   Tag endings of ``_full``, ``_server``, and ``_light`` serve different purposes for entrypoints as follows:
   - Full: This image includes both the main executable file and the tools to convert ``LLaMA`` models into ``ggml`` and convert into 4-bit quantization.
   - Server: This image only includes the server executable file.
   - Light: This image only includes the main executable file.
 .. list-table::
    :header-rows: 1
    :class: docker-image-compatibility
    * - Full Docker
      - Server Docker
      - Light Docker
      - llama.cpp
      - ROCm
      - Ubuntu
      - GPU
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 24.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 22.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_full/images/sha256-5960fc850024a8a76451f9eaadd89b7e59981ae9f393b407310c1ddf18892577"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_server/images/sha256-1b79775d9f546065a6aaf9ca426e1dd4ed4de0b8f6ee83687758cc05af6538e6"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_light/images/sha256-8f863c4c2857ae42bebd64e4f1a0a1e7cc3ec4503f243e32b4a4dcad070ec361"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
      - 24.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_full/images/sha256-888879b3ee208f9247076d7984524b8d1701ac72611689e89854a1588bec9867"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_server/images/sha256-90e4ff99a66743e33fd00728cd71a768588e5f5ef355aaa196669fe65ac70672"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_light/images/sha256-bd447a049939cb99054f8fbf3f2352870fe906a75e2dc3339c845c08b9c53f9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
      - 22.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_full/images/sha256-5b3a1bc4889c1fcade434b937fbf9cc1c22ff7dc0317c130339b0c9238bc88c4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_server/images/sha256-5228ff99d0f627a9032d668f4381b2e80dc1e301adc3e0821f26d8354b175271"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_light/images/sha256-b12723b332a826a89b7252dddf868cbe4d1a869562fc4aa4032f59e1a683b968"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
      - 24.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_full/images/sha256-cd6e21a6a73f59b35dd5309b09dd77654a94d783bf13a55c14eb8dbf8e9c2615"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_server/images/sha256-c2b4689ab2c47e6626e8fea22d7a63eb03d47c0fde9f5ef8c9f158d15c423e58"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_light/images/sha256-1acc28f29ed87db9cbda629cb29e1989b8219884afe05f9105522be929e94da4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
      - 22.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_full/images/sha256-2f8ae8a44510d96d52dea6cb398b224f7edeb7802df7ec488c6f63d206b3cdc9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_server/images/sha256-fece497ff9f4a28b12f645de52766941da8ead8471aa1ea84b61d4b4568e51f2"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_light/images/sha256-3e14352fa6f8c6128b23cf9342531c20dbfb522550b626e09d83b260a1947022"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
      - 24.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_full/images/sha256-80763062ef0bec15038c35fd01267f1fc99a5dd171d4b48583cc668b15efad69"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_server/images/sha256-db2a6c957555ed83b819bbc54aea884a93192da0fb512dae63d32e0dc4e8ab8f"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_light/images/sha256-c6dbb07cc655fb079d5216e4b77451cb64a9daa0585d23b6fb8b32cb22021197"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
      - 22.04
      - MI325X, MI300X, MI210
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_full/images/sha256-f78f6c81ab2f8e957469415fe2370a1334fe969c381d1fe46050c85effaee9d5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_server/images/sha256-275ad9e18f292c26a00a2de840c37917e98737a88a3520bdc35fd3fc5c9a6a9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_light/images/sha256-cc324e6faeedf0e400011f07b49d2dc41a16bae257b2b7befa0f4e2e97231320"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - `b5997 <https://github.com/ROCm/llama.cpp/tree/release/b5997>`__
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - 24.04
      - MI300X, MI210
 .. _llama-cpp-key-rocm-libraries:
 Key ROCm libraries for llama.cpp
 ================================================================================
 llama.cpp functionality on ROCm is determined by its underlying library
 dependencies. These ROCm components affect the capabilities, performance, and
 feature set available to developers. Ensure you have the required libraries for 
 your corresponding ROCm version.
 .. list-table::
    :header-rows: 1
    * - ROCm library
      - ROCm 7.0.0 version
      - ROCm 6.4.x version
      - Purpose
      - Usage
    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
      - 3.0.0
      - 2.4.0
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
      - Supports operations such as matrix multiplication, matrix-vector
        products, and tensor contractions. Utilized in both dense and batched
        linear algebra operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
      - 1.0.0
      - 0.12.0
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
      - By setting the flag ``ROCBLAS_USE_HIPBLASLT``, you can dispatch hipblasLt
        kernels where possible.
    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
      - 2.0.0
      - 1.7.0
      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
        multiplication (GEMM) and accumulation operations with mixed precision
        support.
      - Can be used to enhance the flash attention performance on AMD compute, by enabling
        the flag during compile time.
 .. _llama-cpp-uses-recommendations:
 Use cases and recommendations
 ================================================================================
 llama.cpp can be applied in a variety of scenarios, particularly when you need to meet one or more of the following requirements:
 - Plain C/C++ implementation with no external dependencies
 - Support for 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory usage
 - Custom HIP (Heterogeneous-compute Interface for Portability) kernels for running large language models (LLMs) on AMD GPUs (graphics processing units)
 - CPU (central processing unit) + GPU (graphics processing unit) hybrid inference for partially accelerating models larger than the total available VRAM (video random-access memory)
 llama.cpp is also used in a range of real-world applications, including:
 - Games such as `Lucy's Labyrinth <https://github.com/MorganRO8/Lucys_Labyrinth>`__:
  A simple maze game where AI-controlled agents attempt to trick the player.
 - Tools such as `Styled Lines <https://marketplace.unity.com/packages/tools/ai-ml-integration/style-text-webgl-ios-stand-alone-llm-llama-cpp-wrapper-292902>`__:
  A proprietary, asynchronous inference wrapper for Unity3D game development, including pre-built mobile and web platform wrappers and a model example.
 - Various other AI applications use llama.cpp as their inference engine;  
  for a detailed list, see the `user interfaces (UIs) section <https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#description>`__.
 For more use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
 where you can search for llama.cpp examples and best practices to optimize your workloads on AMD GPUs.
 - The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__ 
  blog post outlines how the open-source llama.cpp framework enables efficient LLM inference—including interactive inference with ``llama-cli``, 
  server deployment with ``llama-server``, GGUF model preparation and quantization, performance benchmarking, and optimizations tailored for 
  AMD Instinct GPUs within the ROCm ecosystem. 
 Previous versions
 ===============================================================================
 See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/llama-cpp-history` to find documentation for previous releases
 of the ``ROCm/llama.cpp`` Docker image.
--- a/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
    :description: Megablocks compatibility
-    :keywords: GPU, megablocks, deep learning, framework compatibility
+    :keywords: GPU, megablocks, compatibility
 .. version-set:: rocm_version latest
@@ -10,41 +10,64 @@
 Megablocks compatibility
 ********************************************************************************
-`Megablocks <https://github.com/databricks/megablocks>`__ is a lightweight library 
+Megablocks is a light-weight library for mixture-of-experts (MoE) training. 
 for mixture-of-experts `(MoE) <https://huggingface.co/blog/moe>`__ training. 
 The core of the system is efficient "dropless-MoE" and standard MoE layers. 
-Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM 
+Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_, 
 <https://github.com/stanford-futuredata/Megatron-LM>`__, 
 where data and pipeline parallel training of MoEs is supported.
-Support overview
+* ROCm support for Megablocks is hosted in the official `https://github.com/ROCm/megablocks <https://github.com/ROCm/megablocks>`_ repository. 
 * Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository. 
 * Use the prebuilt :ref:`Docker image <megablocks-docker-compat>` with ROCm, PyTorch, and Megablocks preinstalled. 
 * See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` to install and get started.
 .. note::
  Megablocks is supported on ROCm 6.3.0.
 Supported devices
 ================================================================================
- The ROCm-supported version of Megablocks is maintained in the official `https://github.com/ROCm/megablocks 
+- **Officially Supported**: AMD Instinct MI300X
-  <https://github.com/ROCm/megablocks>`__ repository, which differs from the 
+- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
  `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
- To get started and install Megablocks on ROCm, use the prebuilt :ref:`Docker image <megablocks-docker-compat>`, 
+Supported models and features
-  which includes ROCm, Megablocks, and all required dependencies.
+================================================================================
-  - See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` 
+This section summarizes the Megablocks features supported by ROCm.
-    for installation and setup instructions.
+
 * Distributed Pre-training
 * Activation Checkpointing and Recomputation
 * Distributed Optimizer
 * Mixture-of-Experts
 * dropless-Mixture-of-Experts
 .. _megablocks-recommendations:
 Use cases and recommendations
 ================================================================================
 The `ROCm Megablocks blog posts <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ 
 guide how to leverage the ROCm platform for pre-training using the Megablocks framework. 
 It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
 * Single-GPU pre-training
 * Multi-GPU pre-training
  - You can also consult the upstream `Installation guide <https://github.com/databricks/megablocks>`__ 
    for additional context.
 .. _megablocks-docker-compat:
-Compatibility matrix
+Docker image compatibility
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
-AMD validates and publishes `Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`__
+AMD validates and publishes `ROCm Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`_
-with ROCm backends on Docker Hub. The following Docker image tag and associated
+with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
-inventories represent the latest available Megablocks version from the official Docker Hub. 
+inventories represent the latest Megatron-LM version from the official Docker Hub.
 The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: 
@@ -57,7 +80,6 @@ Click |docker-icon| to view the image on Docker Hub.
      - PyTorch
      - Ubuntu
      - Python
      - GPU
    * - .. raw:: html
@@ -67,38 +89,5 @@ Click |docker-icon| to view the image on Docker Hub.
      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - MI300X
 Supported models and features with ROCm 6.3.0
 ================================================================================
 This section summarizes the Megablocks features supported by ROCm.
 * Distributed Pre-training
 * Activation Checkpointing and Recomputation
 * Distributed Optimizer
 * Mixture-of-Experts
 * dropless-Mixture-of-Experts
 .. _megablocks-recommendations:
 Use cases and recommendations
 ================================================================================
 * The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs 
  <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__ 
  blog post guides how to leverage the ROCm platform for pre-training using the 
  Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts 
  (MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it 
  demonstrates how block-sparse computations can enhance scalability and efficiency in MoE 
  training. The guide provides step-by-step instructions for setting up the environment, 
  including cloning the repository, building the Docker image, and running the training container. 
  Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training 
  language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE 
  training workflows for large-scale transformer models.
 It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
 * Single-GPU pre-training
 * Multi-GPU pre-training
--- a/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
    :description: PyTorch compatibility
-    :keywords: GPU, PyTorch, deep learning, framework compatibility
+    :keywords: GPU, PyTorch compatibility
 .. version-set:: rocm_version latest
@@ -15,42 +15,40 @@ deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
 using `MIOpen <https://github.com/ROCm/MIOpen>`__ and
 `RCCL <https://github.com/ROCm/rccl>`__ libraries.
-PyTorch provides two high-level features:
+ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
 to independent compatibility considerations, this results in two distinct
 release cycles for PyTorch on ROCm:
- Tensor computation (like NumPy) with strong GPU acceleration
+- ROCm PyTorch release:
- Deep neural networks built on a tape-based autograd system (rapid computation 
+  - Provides the latest version of ROCm but might not necessarily support the
-  of multiple partial derivatives or gradients)
+    latest stable PyTorch version.
-Support overview
+  - Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
-================================================================================
+    preinstalled.
-ROCm support for PyTorch is upstreamed into the official PyTorch repository. 
+  - ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`__
 ROCm development is aligned with the stable release of PyTorch, while upstream 
 PyTorch testing uses the stable release of ROCm to maintain consistency:
- The ROCm-supported version of PyTorch is maintained in the official `https://github.com/ROCm/pytorch 
+  - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
-  <https://github.com/ROCm/pytorch>`__ repository, which differs from the 
+    to get started.
  `https://github.com/pytorch/pytorch <https://github.com/pytorch/pytorch>`__ upstream repository.
- To get started and install PyTorch on ROCm, use the prebuilt :ref:`Docker images <pytorch-docker-compat>`, 
+- Official PyTorch release:
  which include ROCm, PyTorch, and all required dependencies.
-  - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` 
+  - Provides the latest stable version of PyTorch  but might not necessarily
-    for installation and setup instructions.
+    support the latest ROCm version.
-  - You can also consult the upstream `Installation guide <https://pytorch.org/get-started/locally/>`__ or 
+  - Official PyTorch repository: `<https://github.com/pytorch/pytorch>`__
-    `Previous versions <https://pytorch.org/get-started/previous-versions/>`__ for additional context.
+
  - See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`__
    or `Previous versions <https://pytorch.org/get-started/previous-versions/>`__
    to get started.
 PyTorch includes tooling that generates HIP source code from the CUDA backend.
 This approach allows PyTorch to support ROCm without requiring manual code
 modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
-Version support
+ROCm development is aligned with the stable release of PyTorch, while upstream
--------------------------------------------------------------------------------
+PyTorch testing uses the stable release of ROCm to maintain consistency.
 AMD releases official `ROCm PyTorch Docker images <https://hub.docker.com/r/rocm/pytorch/tags>`_
 quarterly alongside new ROCm releases. These images undergo full AMD testing.
 .. _pytorch-recommendations:
@@ -75,12 +73,12 @@ Use cases and recommendations
 * The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  GPU using ROCm. This guide helps users achieve optimal performance for
+  accelerator using ROCm. This guide helps users achieve optimal performance for
  deep learning and other high-performance computing tasks on the MI300X
-  GPU.
+  accelerator.
 * The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
-  describes how PyTorch integrates with ROCm for AI workloads. It outlines the
+  describes how PyTorch integrates with ROCm for AI workloads It outlines the
  use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
  GPU hardware for training and inference tasks in AI applications.
@@ -91,12 +89,141 @@ For more use cases and recommendations, see `ROCm PyTorch blog posts <https://ro
 Docker image compatibility
 ================================================================================
-AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch/tags>`__
+.. |docker-icon| raw:: html
 with ROCm backends on Docker Hub.
-To find the right image tag, see the :ref:`PyTorch on ROCm installation
+   <i class="fab fa-docker"></i>
-documentation <rocm-install-on-linux:pytorch-docker-support>` for a list of
+
-available ``rocm/pytorch`` images.
+AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
 with ROCm backends on Docker Hub. The following Docker image tags and associated
 inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: PyTorch Docker image components
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker
      - PyTorch
      - Ubuntu
      - Python
      - Apex
      - torchvision
      - TensorBoard
      - MAGMA
      - UCX
      - OMPI
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
      - 24.04
      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
      - 22.04
      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
      - 24.04
      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
      - 22.04
      - `3.11 <https://www.python.org/downloads/release/python-31113/>`__
      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
      - 22.04
      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
      - 24.04
      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
      - 22.04
      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
      - 24.04
      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
      - `master <https://bitbucket.org/icl/magma/src/master/>`__
      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
 Key ROCm libraries for PyTorch
 ================================================================================
@@ -239,8 +366,7 @@ feature set available to developers.
 Supported modules and data types
 ================================================================================
-The following section outlines the supported data types, modules, and domain
+The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
 libraries available in PyTorch on ROCm.
 Supported data types
 --------------------------------------------------------------------------------
@@ -339,7 +465,7 @@ with ROCm.
    * - Library
      - Description
-    * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
+    * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_ 
      - Audio and signal processing library for PyTorch. Provides utilities for
        audio I/O, signal and data processing functions, datasets, model
        implementations, and application components for audio and speech
@@ -349,7 +475,7 @@ with ROCm.
        you need to explicitly move audio data (waveform tensor) to GPU using
        ``.to('cuda')``.
-    * - `torchtune <https://meta-pytorch.org/torchtune/stable/index.html>`_
+    * - `torchtune <https://docs.pytorch.org/torchtune/stable/index.html>`_
      - PyTorch-native library designed for fine-tuning large language models
        (LLMs). Provides supports the full fine-tuning workflow and offers
        compatibility with popular production inference systems.
@@ -361,12 +487,21 @@ with ROCm.
        popular datasets, model architectures, and common image transformations
        for computer vision applications.
-    * - `torchdata <https://meta-pytorch.org/data/beta/index.html#torchdata>`_
+    * - `torchtext <https://docs.pytorch.org/text/stable/index.html>`_
      - Text processing library for PyTorch. Provides data processing utilities
        and popular datasets for natural language processing, including
        tokenization, vocabulary management, and text embeddings.
        **Note:** ``torchtext`` does not implement ROCm-specific kernels. 
        ROCm acceleration is provided through the underlying PyTorch framework
        and ROCm library integration. Only official release exists.
    * - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
      - Beta library of common modular data loading primitives for easily
        constructing flexible and performant data pipelines, with features still
        in prototype stage.
-    * - `torchrec <https://meta-pytorch.org/torchrec/>`_
+    * - `torchrec <https://docs.pytorch.org/torchrec/>`_
      - PyTorch domain library for common sparsity and parallelism primitives
        needed for large-scale recommender systems, enabling authors to train
        models with large embedding tables shared across many GPUs.
@@ -398,101 +533,3 @@ with ROCm.
        dispatching.
        **Note:** Only official release exists.
 Key features and enhancements for PyTorch 2.9 with ROCm 7.1.1
 ================================================================================
 - Scaled Dot Product Attention (SDPA) upgraded to use AOTriton version 0.11b.
 - Default hipBLASLt support enabled for gfx908 architecture on ROCm 6.3 and later.
 - MIOpen now supports channels last memory format for 3D convolutions and batch normalization.
 - NHWC convolution operations in MIOpen optimized by eliminating unnecessary transpose operations.
 - Improved tensor.item() performance by removing redundant synchronization.
 - Enhanced performance for element-wise operations and reduction kernels.
 - Added support for grouped GEMM operations through fbgemm_gpu generative AI components.
 - Resolved device error in Inductor when using CUDA graph trees with HIP.
 - Corrected logsumexp scaling in AOTriton-based SDPA implementation.
 - Added stream graph capture status validation in memory copy synchronization functions.
 Key features and enhancements for PyTorch 2.8 with ROCm 7.1
 ================================================================================
 - MIOpen deep learning optimizations: Further optimized NHWC BatchNorm feature.
 - Added float8 support for the DeepSpeed extension, allowing for decreased
  memory footprint and increased throughput in training and inference workloads.
 - ``torch.nn.functional.scaled_dot_product_attention`` now calling optimized
  flash attention kernel automatically.
 Key features and enhancements for PyTorch 2.7/2.8 with ROCm 7.0
 ================================================================================
 - Enhanced TunableOp framework: Introduces ``tensorfloat32`` support for
  TunableOp operations, improved offline tuning for ScaledGEMM operations,
  submatrix offline tuning capabilities, and better logging for BLAS operations
  without bias vectors.
 - Expanded GPU architecture support: Provides optimized support for newer GPU
  architectures, including gfx1200 and gfx1201 with preferred hipBLASLt backend
  selection, along with improvements for gfx950 and gfx1100 Series GPUs.
 - Advanced Triton Integration: AOTriton 0.10b introduces official support for
  gfx950 and gfx1201, along with experimental support for gfx1101, gfx1151,
  gfx1150, and gfx1200.
 - Improved element-wise kernel performance: Delivers enhanced vectorized
  element-wise kernels with better support for heterogeneous tensor types and
  optimized input vectorization for tensors with mixed data types.
 - MIOpen deep learning optimizations: Enables NHWC BatchNorm by default on
  ROCm 7.0+, provides ``maxpool`` forward and backward performance improvements
  targeting ResNet scenarios, and includes updated launch configurations for
  better performance.
 - Enhanced memory and tensor operations: Features fixes for in-place ``aten``
  sum operations with specialized templated kernels, improved 3D tensor
  performance with NHWC format, and better handling of memory-bound matrix
  multiplication operations.
 - Robust testing and quality improvements: Includes comprehensive test suite
  updates with improved tolerance handling for Navi3x architectures, generalized
  ROCm-specific test conditions, and enhanced unit test coverage for Flash
  Attention and Memory Efficient operations.
 - Composable Kernel (CK) updates: Features updated CK submodule integration with
  the latest optimizations and performance improvements for core mathematical
  operations.
 - Development and debugging enhancements: Includes improved source handling for
  dynamic compilation, better error handling for atomic operations, and enhanced
  state checking for trace operations.
 - Integrate APEX fused layer normalization, which can have positive impact on
  text-to-video models.
 - Integrate APEX distributed fused LAMB and distributed fused ADAM, which can
  have positive impact on BERT-L and Llama2-SFT.
 - FlashAttention v3 has been integrated for AMD GPUs.
 - `Pytorch C++ extensions <https://pytorch.org/tutorials/advanced/cpp_extension.html>`_
  provide a mechanism for compiling custom operations that can be used during
  network training or inference. For AMD platforms, ``amdclang++`` has been
  validated as the supported compiler for building these extensions.
 Known issues and notes for PyTorch 2.7/2.8 with ROCm 7.0 and ROCm 7.1
 ================================================================================
 - The ``matmul.allow_fp16_reduced_precision_reduction`` and
  ``matmul.allow_bf16_reduced_precision_reduction`` options under
  ``torch.backends.cuda`` are not supported. As a result,
  reduced-precision reductions using FP16 or BF16 accumulation types are not
  available.
--- a/docs/compatibility/ml-compatibility/ray-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/ray-compatibility.rst
@@ -1,114 +0,0 @@
 :orphan:
 .. meta::
    :description: Ray compatibility
    :keywords: GPU, Ray, deep learning, framework compatibility
 .. version-set:: rocm_version latest
 *******************************************************************************
 Ray compatibility
 *******************************************************************************
 Ray is a unified framework for scaling AI and Python applications from your laptop 
 to a full cluster, without changing your code. Ray consists of `a core distributed 
 runtime  <https://docs.ray.io/en/latest/ray-core/walkthrough.html>`__ and a set of 
 `AI libraries <https://docs.ray.io/en/latest/ray-air/getting-started.html>`__ for 
 simplifying machine learning computations.
 Ray is a general-purpose framework that runs many types of workloads efficiently. 
 Any Python application can be scaled with Ray, without extra infrastructure.
 Support overview
 ================================================================================
 - The ROCm-supported version of Ray is maintained in the official `https://github.com/ROCm/ray 
  <https://github.com/ROCm/ray>`__ repository, which differs from the 
  `https://github.com/ray-project/ray <https://github.com/ray-project/ray>`__ upstream repository.
 - To get started and install Ray on ROCm, use the prebuilt :ref:`Docker image <ray-docker-compat>`, 
  which includes ROCm, Ray, and all required dependencies.
  - See the :doc:`ROCm Ray installation guide <rocm-install-on-linux:install/3rd-party/ray-install>`
    for installation and setup instructions.
  - You can also consult the upstream `Installation guide <https://docs.ray.io/en/latest/ray-overview/installation.html>`__ 
    for additional context.
 .. _ray-docker-compat:
 Compatibility matrix
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `ROCm Ray Docker images <https://hub.docker.com/r/rocm/ray/tags>`__
 with ROCm backends on Docker Hub. The following Docker image tags and
 associated inventories represent the latest Ray version from the official Docker Hub.
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table::
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker image
      - ROCm
      - Ray
      - Pytorch
      - Ubuntu
      - Python
      - GPU
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/ray/ray-2.51.1_rocm7.0.0_ubuntu22.04_py3.12_pytorch2.9.0/images/sha256-a02f6766b4ba406f88fd7e85707ec86c04b569834d869a08043ec9bcbd672168"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.51.1 <https://github.com/ROCm/ray/tree/release/2.51.1>`__
      - 2.9.0a0+git1c57644
      - 22.04
      - `3.12.12 <https://www.python.org/downloads/release/python-31212/>`__
      - MI300X
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/ray/ray-2.48.0.post0_rocm6.4.1_ubuntu24.04_py3.12_pytorch2.6.0/images/sha256-0d166fe6bdced38338c78eedfb96eff92655fb797da3478a62dd636365133cc0"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
      - `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
      - `2.48.0.post0 <https://github.com/ROCm/ray/tree/release/2.48.0.post0>`__
      - 2.6.0+git684f6f2
      - 24.04
      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`__
      - MI300X, MI210
 Use cases and recommendations
 ================================================================================
 * The `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm 
  Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__  
  blog provides an overview of Volcano Engine Reinforcement Learning (verl) 
  for large language models (LLMs) and discusses its benefits in large-scale 
  reinforcement learning from human feedback (RLHF). It uses Ray as part of a 
  hybrid orchestration engine to schedule and coordinate training and inference 
  tasks in parallel, enabling optimized resource utilization and potential overlap 
  between these phases. This dynamic resource allocation strategy significantly 
  improves overall system efficiency. The blog presents verl’s performance results, 
  focusing on throughput and convergence accuracy achieved on AMD Instinct™ MI300X 
  GPUs. Follow this guide to get started with verl on AMD Instinct GPUs and 
  accelerate your RLHF training with ROCm-optimized performance.
 * The `Exploring Use Cases for Scalable AI: Implementing Ray with ROCm Support for Efficient ML Workflows 
  <https://rocm.blogs.amd.com/artificial-intelligence/rocm-ray/README.html>`__
  blog post describes key use cases such as training and inference for large language models (LLMs), 
  model serving, hyperparameter tuning, reinforcement learning, and the orchestration of large-scale 
  workloads using Ray in the ROCm environment.
 For more use cases and recommendations, see the AMD GPU tabs in the `Accelerator Support 
 topic <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#accelerator-support>`__ 
 of the Ray core documentation and refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
 where you can search for Ray examples and best practices to optimize your workloads on AMD GPUs.
 Previous versions
 ===============================================================================
 See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/ray-history` to find documentation for previous releases
 of the ``ROCm/ray`` Docker image.
--- a/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
    :description: Stanford Megatron-LM compatibility
-    :keywords: Stanford, Megatron-LM, deep learning, framework compatibility
+    :keywords: Stanford, Megatron-LM, compatibility
 .. version-set:: rocm_version latest
@@ -10,76 +10,34 @@
 Stanford Megatron-LM compatibility
 ********************************************************************************
-Stanford Megatron-LM is a large-scale language model training framework developed 
+Stanford Megatron-LM is a large-scale language model training framework developed by NVIDIA `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. It is
-by NVIDIA at `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. 
+designed to train massive transformer-based language models efficiently by model and data parallelism. 
 It is designed to train massive transformer-based language models efficiently by model 
 and data parallelism. 
-It provides efficient tensor, pipeline, and sequence-based model parallelism for 
+* ROCm support for Stanford Megatron-LM is hosted in the official `https://github.com/ROCm/Stanford-Megatron-LM <https://github.com/ROCm/Stanford-Megatron-LM>`_ repository. 
-pre-training transformer-based language models such as GPT (Decoder Only), BERT 
+* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository. 
-(Encoder Only), and T5 (Encoder-Decoder). 
+* Use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>` with ROCm, PyTorch, and Megatron-LM preinstalled. 
 * See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` to install and get started.
-Support overview
+.. note::
 	Stanford Megatron-LM is supported on ROCm 6.3.0.
 Supported Devices
 ================================================================================
- The ROCm-supported version of Stanford Megatron-LM is maintained in the official `https://github.com/ROCm/Stanford-Megatron-LM 
+- **Officially Supported**: AMD Instinct MI300X
-  <https://github.com/ROCm/Stanford-Megatron-LM>`__ repository, which differs from the 
+- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
  `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
 - To get started and install Stanford Megatron-LM on ROCm, use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>`, 
  which includes ROCm, Stanford Megatron-LM, and all required dependencies.
-  - See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` 
+Supported models and features
    for installation and setup instructions.
  - You can also consult the upstream `Installation guide <https://github.com/NVIDIA/Megatron-LM>`__ 
    for additional context.
 .. _megatron-lm-docker-compat:
 Compatibility matrix
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/stanford-megatron-lm/tags>`_
 with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
 inventories represent the latest Stanford Megatron-LM version from the official Docker Hub.
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: 
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker image
      - ROCm
      - Stanford Megatron-LM
      - PyTorch
      - Ubuntu
      - Python
      - GPU
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i> rocm/stanford-megatron-lm</a>
      - `6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_
      - `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - MI300X
 Supported models and features with ROCm 6.3.0
 ================================================================================
 This section details models & features that are supported by the ROCm version on Stanford Megatron-LM.
 Models:
-* BERT
+* Bert
 * GPT
 * T5
 * ICT
@@ -96,21 +54,47 @@ Features:
 Use cases and recommendations
 ================================================================================
-The following blog post mentions Megablocks, but you can run Stanford Megatron-LM with the same steps to pre-process datasets on AMD GPUs:
+See the `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs blog <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ post  
 to leverage the ROCm platform for pre-training by using the Stanford Megatron-LM framework of pre-processing datasets on AMD GPUs. 
 Coverage includes:
-* The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs 
+  * Single-GPU pre-training
-  <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__ 
+  * Multi-GPU pre-training
  blog post guides how to leverage the ROCm platform for pre-training using the 
  Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts 
  (MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it 
  demonstrates how block-sparse computations can enhance scalability and efficiency in MoE 
  training. The guide provides step-by-step instructions for setting up the environment, 
  including cloning the repository, building the Docker image, and running the training container. 
  Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training 
  language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE 
  training workflows for large-scale transformer models.
 It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
-* Single-GPU pre-training
+.. _megatron-lm-docker-compat:
-* Multi-GPU pre-training
+
 Docker image compatibility
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/megatron-lm>`_
 with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
 inventories represent the latest Megatron-LM version from the official Docker Hub.
 The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: 
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker image
      - Stanford Megatron-LM
      - PyTorch
      - Ubuntu
      - Python
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i></a>
      - `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
--- a/docs/compatibility/ml-compatibility/taichi-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/taichi-compatibility.rst
@@ -0,0 +1,76 @@
 :orphan:
 .. meta::
    :description: Taichi compatibility
    :keywords: GPU, Taichi compatibility
 .. version-set:: rocm_version latest
 *******************************************************************************
 Taichi compatibility
 *******************************************************************************
 `Taichi <https://www.taichi-lang.org/>`_ is an open-source, imperative, and parallel 
 programming language designed for high-performance numerical computation. 
 Embedded in Python, it leverages just-in-time (JIT) compilation frameworks such as LLVM to accelerate 
 compute-intensive Python code by compiling it to native GPU or CPU instructions.
 Taichi is widely used across various domains, including real-time physical simulation, 
 numerical computing, augmented reality, artificial intelligence, computer vision, robotics, 
 visual effects in film and gaming, and general-purpose computing.
 * ROCm support for Taichi is hosted in the official `https://github.com/ROCm/taichi <https://github.com/ROCm/taichi>`_ repository.
 * Due to independent compatibility considerations, this location differs from the `https://github.com/taichi-dev <https://github.com/taichi-dev>`_ upstream repository.
 * Use the prebuilt :ref:`Docker image <taichi-docker-compat>` with ROCm, PyTorch, and Taichi preinstalled.
 * See the :doc:`ROCm Taichi installation guide <rocm-install-on-linux:install/3rd-party/taichi-install>` to install and get started.
 .. note::
 	Taichi is supported on ROCm 6.3.2.
 Supported devices and features
 ===============================================================================
 There is support through the ROCm software stack for all Taichi GPU features on AMD Instinct MI250X and MI210X series GPUs with the exception of Taichi’s GPU rendering system, CGUI.
 AMD Instinct MI300X series GPUs will be supported by November.
 .. _taichi-recommendations:
 Use cases and recommendations
 ================================================================================
 To fully leverage Taichi's performance capabilities in compute-intensive tasks, it is best to adhere to specific coding patterns and utilize Taichi decorators. 
 A collection of example use cases is available in the `https://github.com/ROCm/taichi_examples <https://github.com/ROCm/taichi_examples>`_ repository, 
 providing practical insights and foundational knowledge for working with the Taichi programming language. 
 You can also refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_ to search for Taichi examples and best practices to optimize your workflows on AMD GPUs.
 .. _taichi-docker-compat:
 Docker image compatibility
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes ready-made `ROCm Taichi Docker images <https://hub.docker.com/r/rocm/taichi/tags>`_
 with ROCm backends on Docker Hub. The following Docker image tags and associated inventories 
 represent the latest Taichi version from the official Docker Hub.
 The Docker images have been validated for `ROCm 6.3.2 <https://rocm.docs.amd.com/en/docs-6.3.2/about/release-notes.html>`_. 
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table:: 
    :header-rows: 1
    :class: docker-image-compatibility
    * - Docker image
      - ROCm
      - Taichi
      - Ubuntu
      - Python
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/taichi/taichi-1.8.0b1_rocm6.3.2_ubuntu22.04_py3.10.12/images/sha256-e016964a751e6a92199032d23e70fa3a564fff8555afe85cd718f8aa63f11fc6"><i class="fab fa-docker fa-lg"></i> rocm/taichi</a>
      - `6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_
      - `1.8.0b1 <https://github.com/taichi-dev/taichi>`_
      - 22.04
      - `3.10.12 <https://www.python.org/downloads/release/python-31012/>`_
--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
    :description: TensorFlow compatibility
-    :keywords: GPU, TensorFlow, deep learning, framework compatibility
+    :keywords: GPU, TensorFlow compatibility
 .. version-set:: rocm_version latest
@@ -12,46 +12,115 @@ TensorFlow compatibility
 `TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for
 solving machine learning, deep learning, and AI problems. It can solve many
-problems across different sectors and industries, but primarily focuses on
+problems across different sectors and industries but primarily focuses on
-neural network training and inference. It is one of the most popular deep 
+neural network training and inference. It is one of the most popular and
-learning frameworks and is very active in open-source development.
+in-demand frameworks and is very active in open-source contribution and
-
+development.
 Support overview
 ================================================================================
 - The ROCm-supported version of TensorFlow is maintained in the official `https://github.com/ROCm/tensorflow-upstream 
  <https://github.com/ROCm/tensorflow-upstream>`__ repository, which differs from the 
  `https://github.com/tensorflow/tensorflow <https://github.com/tensorflow/tensorflow>`__ upstream repository.
 - To get started and install TensorFlow on ROCm, use the prebuilt :ref:`Docker images <tensorflow-docker-compat>`, 
  which include ROCm, TensorFlow, and all required dependencies.
  - See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>` 
    for installation and setup instructions.
  - You can also consult the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list 
    for additional context.
 Version support
 --------------------------------------------------------------------------------
 The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__
 includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
 <http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug
-fixes, updates, and support for the latest ROCm versions.
+fixes, updates, and support for the latest ROCM versions.
 - ROCm TensorFlow release:
  - Offers :ref:`Docker images <tensorflow-docker-compat>` with
    ROCm and TensorFlow pre-installed.
  - ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`__
  - See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
    to get started.
 - Official TensorFlow release:
  - Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`__
  - See the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list.
  .. note::
     The official TensorFlow documentation does not cover ROCm support. Use the
     ROCm documentation for installation instructions for Tensorflow on ROCm.
     See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
 .. _tensorflow-docker-compat:
 Docker image compatibility
-================================================================================
+===============================================================================
-AMD provides preconfigured Docker images with TensorFlow and the ROCm backend.
+.. |docker-icon| raw:: html
 These images are published on `Docker Hub <https://hub.docker.com/r/rocm/tensorflow>`__ and are the
 recommended way to get started with deep learning with TensorFlow on ROCm.
-To find the right image tag, see the :ref:`TensorFlow on ROCm installation
+   <i class="fab fa-docker"></i>
-documentation <rocm-install-on-linux:tensorflow-docker-support>` for a list of
+
-available ``rocm/tensorflow`` images.
+AMD validates and publishes ready-made `TensorFlow images
 <https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
 Docker Hub. The following Docker image tags and associated inventories are
 validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
 the |docker-icon| icon to view the image on Docker Hub.
 .. list-table:: TensorFlow Docker image components
    :header-rows: 1
    * - Docker image
      - TensorFlow
      - Ubuntu
      - Python
      - TensorBoard
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - 24.04
      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - 22.04
      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - 24.04
      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - 22.04
      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - 24.04
      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - 22.04
      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
 Critical ROCm libraries for TensorFlow
@@ -136,7 +205,7 @@ The following section maps supported data types and GPU-accelerated TensorFlow
 features to their minimum supported ROCm and TensorFlow versions.
 Data types
---------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 The data type of a tensor is specified using the ``dtype`` attribute or
 argument, and TensorFlow supports a wide range of data types for different use
@@ -254,7 +323,7 @@ are as follows:
      - 1.7
 Features
---------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This table provides an overview of key features in TensorFlow and their
 availability in ROCm.
@@ -346,7 +415,7 @@ availability in ROCm.
      - 1.9.2
 Distributed library features
-----------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Enables developers to scale computations across multiple devices on a single machine or
 across multiple machines.
--- a/docs/compatibility/ml-compatibility/verl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/verl-compatibility.rst
@@ -2,7 +2,7 @@
 .. meta::
   :description: verl compatibility
-   :keywords: GPU, verl, deep learning, framework compatibility
+   :keywords: GPU, verl compatibility
 .. version-set:: rocm_version latest
@@ -10,109 +10,77 @@
 verl compatibility
 *******************************************************************************
-Volcano Engine Reinforcement Learning for LLMs (`verl <https://verl.readthedocs.io/en/latest/>`__)  
+Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs). 
-is a reinforcement learning framework designed for large language models (LLMs). 
+verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support.
 verl offers a scalable, open-source fine-tuning solution by using a hybrid programming model 
 that makes it easy to define and run complex post-training dataflows efficiently. 
-Its modular APIs separate computation from data, allowing smooth integration with other frameworks. 
+* See the `verl documentation <https://verl.readthedocs.io/en/latest/>`_ for more information about verl. 
-It also supports flexible model placement across GPUs for efficient scaling on different cluster sizes.
+* The official verl GitHub repository is `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`_.
-verl achieves high training and generation throughput by building on existing LLM frameworks. 
+* Use the AMD-validated :ref:`Docker images <verl-docker-compat>` with ROCm and verl preinstalled. 
-Its 3D-HybridEngine reduces memory use and communication overhead when switching between training 
+* See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>` to install and get started.
 and inference, improving overall performance.
-Support overview
+.. note::
 ================================================================================
- The ROCm-supported version of verl is maintained in the official `https://github.com/ROCm/verl 
+	verl is supported on ROCm 6.2.0.
  <https://github.com/ROCm/verl>`__ repository, which differs from the 
  `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`__ upstream repository.
 - To get started and install verl on ROCm, use the prebuilt :ref:`Docker image <verl-docker-compat>`, 
  which includes ROCm, verl, and all required dependencies.
  - See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>`
    for installation and setup instructions.
  - You can also consult the upstream `verl documentation <https://verl.readthedocs.io/en/latest/>`__ 
    for additional context.
 .. _verl-docker-compat:
 Compatibility matrix
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes `verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
 with ROCm backends on Docker Hub. The following Docker image tag and associated inventories 
 represent the latest verl version from the official Docker Hub. 
 Click |docker-icon| to view the image on Docker Hub.
 .. list-table::
   :header-rows: 1
   :class: docker-image-compatibility
   * - Docker image
     - ROCm
     - verl
     - Ubuntu
     - PyTorch
     - Python
     - vllm
     - GPU
   * - .. raw:: html
        <a href="https://hub.docker.com/layers/rocm/verl/verl-0.6.0.amd0_rocm7.0_vllm0.11.0.dev/images/sha256-f70a3ebc94c1f66de42a2fcc3f8a6a8d6d0881eb0e65b6958d7d6d24b3eecb0d"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
     - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
     - `0.6.0 <https://github.com/volcengine/verl/releases/tag/v0.6.0>`__
     - 22.04
     - `2.9.0 <https://github.com/ROCm/pytorch/tree/release/2.9-rocm7.x-gfx115x>`__
     - `3.12.11 <https://www.python.org/downloads/release/python-31211/>`__
     - `0.11.0 <https://github.com/vllm-project/vllm/releases/tag/v0.11.0>`__
     - MI300X
   * - .. raw:: html
        <a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
     - `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`__
     - `0.3.0.post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`__
     - 20.04
     - `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
     - `3.9.19 <https://www.python.org/downloads/release/python-3919/>`__
     - `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`__
     - MI300X
 .. _verl-supported_features:
 Supported modules with verl on ROCm
 ===============================================================================
 The following GPU-accelerated modules are supported with verl on ROCm:
 - ``FSDP``: Training engine
 - ``vllm``: Inference engine
 .. _verl-recommendations:
 Use cases and recommendations
 ================================================================================
-* The benefits of verl in large-scale reinforcement learning from human feedback 
+The benefits of verl in large-scale reinforcement learning from human feedback (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`_ blog.
  (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD 
  GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__ 
  blog. The blog post outlines how the Volcano Engine Reinforcement Learning 
  (verl) framework integrates with the AMD ROCm platform to optimize training on 
  AMD Instinct™ GPUs. The guide details the process of building a Docker image, 
  setting up single-node and multi-node training environments, and highlights 
  performance benchmarks demonstrating improved throughput and convergence accuracy. 
  This resource serves as a comprehensive starting point for deploying verl on AMD GPUs, 
  facilitating efficient RLHF training workflows.
-Previous versions
+.. _verl-supported_features:
 Supported features
 ===============================================================================
-See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/verl-history` to find documentation for previous releases
+
-of the ``ROCm/verl`` Docker image.
+The following table shows verl on ROCm support for GPU-accelerated modules.
 .. list-table::
    :header-rows: 1
    * - Module
      - Description
      - verl version
      - ROCm version
    * - ``FSDP``
      - Training engine
      - 0.3.0.post0
      - 6.2.0
    * - ``vllm``
      - Inference engine
      - 0.3.0.post0
      - 6.2.0
 .. _verl-docker-compat:
 Docker image compatibility
 ================================================================================
 .. |docker-icon| raw:: html
   <i class="fab fa-docker"></i>
 AMD validates and publishes ready-made `ROCm verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
 with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the available verl versions from the official Docker Hub. 
 .. list-table:: 
    :header-rows: 1
    *   - Docker image
        - ROCm
        - verl
        - Ubuntu
        - Pytorch
        - Python
        - vllm
    *   - .. raw:: html
            <a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
        - `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`_
        - `0.3.0post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`_
        - 20.04
        - `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
        - `3.9.19 <https://www.python.org/downloads/release/python-3919/>`_
        - `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`_
--- a/docs/conceptual/gpu-arch.md
+++ b/docs/conceptual/gpu-arch.md
@@ -13,22 +13,21 @@
 :gutter: 1
 :::{grid-item-card}
-**AMD Instinct MI300 Series**
+**AMD Instinct MI300 series**
-Review hardware aspects of the AMD Instinct™ MI300 Series GPUs and the CDNA™ 3
+Review hardware aspects of the AMD Instinct™ MI300 series of GPU accelerators and the CDNA™ 3
 architecture.
 * [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
 * [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
 * [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
-* [MI300 performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
+* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
 * [MI350 Series performance counters](./gpu-arch/mi350-performance-counters.rst)
 :::
 :::{grid-item-card}
-**AMD Instinct MI200 Series**
+**AMD Instinct MI200 series**
-Review hardware aspects of the AMD Instinct™ MI200 Series GPUs and the CDNA™ 2
+Review hardware aspects of the AMD Instinct™ MI200 series of GPU accelerators and the CDNA™ 2
 architecture.
 * [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md)
@@ -41,7 +40,7 @@ architecture.
 :::{grid-item-card}
 **AMD Instinct MI100**
-Review hardware aspects of the AMD Instinct™ MI100 Series GPUs and the CDNA™ 1
+Review hardware aspects of the AMD Instinct™ MI100 series of GPU accelerators and the CDNA™ 1
 architecture.
 * [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md)
--- a/docs/conceptual/gpu-arch/mi100.md
+++ b/docs/conceptual/gpu-arch/mi100.md
@@ -1,14 +1,14 @@
 ---
 myst:
  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI100 Series architecture."
+    "description lang=en": "Learn about the AMD Instinct MI100 series architecture."
    "keywords": "Instinct, MI100, microarchitecture, AMD, ROCm"
 ---
 # AMD Instinct™ MI100 microarchitecture
 The following image shows the node-level architecture of a system that
-comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ GPUs.
+comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ accelerators.
 The two EPYC processors are connected to each other with the AMD Infinity™
 fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such
 that each processor can access the available node memory as a single
@@ -18,29 +18,29 @@ available to connect the processors plus one PCIe Gen 4 x16 link per processor
 can attach additional I/O devices such as the host adapters for the network
 fabric.
-![Structure of a single GCD in the AMD Instinct MI100 GPU](../../data/conceptual/gpu-arch/image004.png "Node-level system architecture with two AMD EPYC™ processors and eight AMD Instinct™ GPUs.")
+![Structure of a single GCD in the AMD Instinct MI100 accelerator](../../data/conceptual/gpu-arch/image004.png "Node-level system architecture with two AMD EPYC™ processors and eight AMD Instinct™ accelerators.")
 In a typical node configuration, each processor can host up to four AMD
-Instinct™ GPUs that are attached using PCIe Gen 4 links at 16 GT/sec,
+Instinct™ accelerators that are attached using PCIe Gen 4 links at 16 GT/sec,
 which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive
-of four GPUs can participate in a fully connected, coherent AMD
+of four accelerators can participate in a fully connected, coherent AMD
-Instinct™ fabric that connects the four GPUs using 23 GT/sec AMD
+Instinct™ fabric that connects the four accelerators using 23 GT/sec AMD
 Infinity fabric links that run at a higher frequency than the inter-processor
 links. This inter-GPU link can be established in certified server systems if the
 GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity
-Fabric™ bridge for the AMD Instinct™ GPUs.
+Fabric™ bridge for the AMD Instinct™ accelerators.
 ## Microarchitecture
-The microarchitecture of the AMD Instinct GPUs is based on the AMD CDNA
+The microarchitecture of the AMD Instinct accelerators is based on the AMD CDNA
 architecture, which targets compute applications such as high-performance
 computing (HPC) and AI & machine learning (ML) that run on everything from
 individual servers to the world's largest exascale supercomputers. The overall
 system architecture is designed for extreme scalability and compute performance.
-![Structure of the AMD Instinct GPU (MI100 generation)](../../data/conceptual/gpu-arch/image005.png "Structure of the AMD Instinct GPU (MI100 generation)")
+![Structure of the AMD Instinct accelerator (MI100 generation)](../../data/conceptual/gpu-arch/image005.png "Structure of the AMD Instinct accelerator (MI100 generation)")
-The above image shows the AMD Instinct GPU with its PCIe Gen 4 x16
+The above image shows the AMD Instinct accelerator with its PCIe Gen 4 x16
 link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host
 processor(s). It also shows the three AMD Infinity Fabric ports that provide
 high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local
@@ -48,7 +48,7 @@ hive.
 On the left and right of the floor plan, the High Bandwidth Memory (HBM)
 attaches via the GPU memory controller.  The MI100 generation of the AMD
-Instinct GPU offers four stacks of HBM generation 2 (HBM2) for a total
+Instinct accelerator offers four stacks of HBM generation 2 (HBM2) for a total
 of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the
 attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz.
@@ -64,7 +64,7 @@ Therefore, the theoretical maximum FP64 peak performance is 11.5 TFLOPS
 ![Block diagram of an MI100 compute unit with detailed SIMD view of the AMD CDNA architecture](../../data/conceptual/gpu-arch/image006.png "An MI100 compute unit with detailed SIMD view of the AMD CDNA architecture")
 The preceding image shows the block diagram of a single CU of an AMD Instinct™
-MI100 GPU and summarizes how instructions flow through the execution
+MI100 accelerator and summarizes how instructions flow through the execution
 engines. The CU fetches the instructions via a 32KB instruction cache and moves
 them forward to execution via a dispatcher. The CU can handle up to ten
 wavefronts at a time and feed their instructions into the execution unit. The
--- a/docs/conceptual/gpu-arch/mi250.md
+++ b/docs/conceptual/gpu-arch/mi250.md
@@ -1,13 +1,13 @@
 ---
 myst:
  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI250 Series architecture."
+    "description lang=en": "Learn about the AMD Instinct MI250 series architecture."
    "keywords": "Instinct, MI250, microarchitecture, AMD, ROCm"
 ---
 # AMD Instinct™ MI250 microarchitecture
-The microarchitecture of the AMD Instinct MI250 GPU is based on the
+The microarchitecture of the AMD Instinct MI250 accelerators is based on the
 AMD CDNA 2 architecture that targets compute applications such as HPC,
 artificial intelligence (AI), and machine learning (ML) and that run on
 everything from individual servers to the world’s largest exascale
@@ -40,7 +40,7 @@ execution units (also called matrix cores), which are geared toward executing
 matrix operations like matrix-matrix multiplications. For FP64, the peak
 performance of these units amounts to 90.5 TFLOPS.
-![Structure of a single GCD in the AMD Instinct MI250 GPU.](../../data/conceptual/gpu-arch/image001.png "Structure of a single GCD in the AMD Instinct MI250 GPU.")
+![Structure of a single GCD in the AMD Instinct MI250 accelerator.](../../data/conceptual/gpu-arch/image001.png "Structure of a single GCD in the AMD Instinct MI250 accelerator.")
 ```{list-table} Peak-performance capabilities of the MI250 OAM for different data types.
 :header-rows: 1
@@ -84,9 +84,16 @@ performance of these units amounts to 90.5 TFLOPS.
  - 362.1
 ```
-The above table summarizes the aggregated peak performance of the AMD Instinct MI250 Open Compute Platform (OCP) Open Accelerator Modules (OAMs) and its two GCDs for different data types and execution units. The middle column lists the peak performance (number of data elements processed in a single instruction) of a single compute unit if a SIMD (or matrix) instruction is being retired in each clock cycle. The third column lists the theoretical peak performance of the OAM module. The theoretical aggregated peak memory bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
+The above table summarizes the aggregated peak performance of the AMD
 Instinct MI250 OCP Open Accelerator Modules (OAM, OCP is short for Open Compute
 Platform) and its two GCDs for different data types and execution units. The
 middle column lists the peak performance (number of data elements processed in a
 single instruction) of a single compute unit if a SIMD (or matrix) instruction
 is being retired in each clock cycle. The third column lists the theoretical
 peak performance of the OAM module. The theoretical aggregated peak memory
 bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
-![Dual-GCD architecture of the AMD Instinct MI250 GPUs](../../data/conceptual/gpu-arch/image002.png "Dual-GCD architecture of the AMD Instinct MI250 GPUs")
+![Dual-GCD architecture of the AMD Instinct MI250 accelerators](../../data/conceptual/gpu-arch/image002.png "Dual-GCD architecture of the AMD Instinct MI250 accelerators")
 The following image shows the block diagram of an OAM package that consists
 of two GCDs, each of which constitutes one GPU device in the system. The two
@@ -98,18 +105,18 @@ between the two GCDs of an OAM, or a bidirectional peak transfer bandwidth of
 ## Node-level architecture
 The following image shows the node-level architecture of a system that is
-based on the AMD Instinct MI250 GPU. The MI250 OAMs attach to the host
+based on the AMD Instinct MI250 accelerator. The MI250 OAMs attach to the host
 system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe
 x16 link to the host part of the system. Depending on the server platform, the
 GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch
 . Note that some platforms may offer an x8 interface to the GCDs, which reduces
 the available host-to-GPU bandwidth.
-![Block diagram of AMD Instinct MI250 GPUs with 3rd Generation AMD EPYC processor](../../data/conceptual/gpu-arch/image003.png "Block diagram of AMD Instinct MI250 GPUs with 3rd Generation AMD EPYC processor")
+![Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor](../../data/conceptual/gpu-arch/image003.png "Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor")
 The preceding image shows the node-level architecture of a system with AMD
 EPYC processors in a dual-socket configuration and four AMD Instinct MI250
-GPUs. The MI250 OAMs attach to the host processors system via PCIe Gen 4
+accelerators. The MI250 OAMs attach to the host processors system via PCIe Gen 4
 x16 links (yellow lines). Depending on the system design, a PCIe switch may
 exist to make more PCIe lanes available for additional components like network
 interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to
--- a/docs/conceptual/gpu-arch/mi300-mi200-performance-counters.rst
+++ b/docs/conceptual/gpu-arch/mi300-mi200-performance-counters.rst
@@ -1,16 +1,16 @@
 .. meta::
-  :description: MI300 and MI200 Series performance counters and metrics
+  :description: MI300 and MI200 series performance counters and metrics
  :keywords: MI300, MI200, performance counters, command processor counters
 ***************************************************************************************************
-MI300 and MI200 Series performance counters and metrics
+MI300 and MI200 series performance counters and metrics
 ***************************************************************************************************
 This document lists and describes the hardware performance counters and derived metrics available
 for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the
 :doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`.
-MI300 and MI200 Series performance counters
+MI300 and MI200 series performance counters
 ===============================================================
 Series performance counters include the following categories:
@@ -27,7 +27,7 @@ The following sections provide additional details for each category.
 .. note::
-  Preliminary validation of all MI300 and MI200 Series performance counters is in progress. Those with
+  Preliminary validation of all MI300 and MI200 series performance counters is in progress. Those with
  an asterisk (*) require further evaluation.
 .. _command-processor-counters:
@@ -171,7 +171,7 @@ Instruction mix
  "``SQ_INSTS_SMEM``", "Instr", "Number of scalar memory instructions issued"
  "``SQ_INSTS_SMEM_NORM``", "Instr", "Number of scalar memory instructions normalized to match ``smem_level`` issued"
  "``SQ_INSTS_FLAT``", "Instr", "Number of flat instructions issued"
-  "``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 Series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
+  "``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
  "``SQ_INSTS_LDS``", "Instr", "Number of LDS instructions issued **(MI200: includes flat; MI300: does not include flat)**"
  "``SQ_INSTS_GDS``", "Instr", "Number of global data share instructions issued"
  "``SQ_INSTS_EXP_GDS``", "Instr", "Number of EXP and global data share instructions excluding skipped export instructions issued"
@@ -396,9 +396,9 @@ Texture cache per pipe counters
  "``TCP_UTCL1_TRANSLATION_MISS[n]``", "Req", "Number of unified translation cache (L1) translation misses", "0-15"
  "``TCP_UTCL1_PERMISSION_MISS[n]``", "Req", "Number of unified translation cache (L1) permission misses", "0-15"
  "``TCP_TOTAL_CACHE_ACCESSES[n]``", "Req", "Number of vector L1d cache accesses including hits and misses", "0-15"
-  "``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 Series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
+  "``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
-  "``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
+  "``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
-  "``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
+  "``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
  "``TCP_TCC_READ_REQ[n]``", "Req", "Number of read requests to L2 cache", "0-15"
  "``TCP_TCC_WRITE_REQ[n]``", "Req", "Number of write requests to L2 cache", "0-15"
  "``TCP_TCC_ATOMIC_WITH_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache with return", "0-15"
@@ -560,7 +560,7 @@ Note the following:
  ``TCC_TAG_STALL[n]``, probes can stall the pipeline at a variety of places. There is no single point that
  can accurately measure the total stalls
-MI300 and MI200 Series derived metrics list
+MI300 and MI200 series derived metrics list
 ==============================================================
 .. csv-table::
--- a/docs/conceptual/gpu-arch/mi300.md
+++ b/docs/conceptual/gpu-arch/mi300.md
@@ -1,21 +1,21 @@
 ---
 myst:
  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI300 Series architecture."
+    "description lang=en": "Learn about the AMD Instinct MI300 series architecture."
    "keywords": "Instinct, MI300X, MI300A, microarchitecture, AMD, ROCm"
 ---
-# AMD Instinct™ MI300 Series microarchitecture
+# AMD Instinct™ MI300 series microarchitecture
-The AMD Instinct MI300 Series GPUs are based on the AMD CDNA 3
+The AMD Instinct MI300 series accelerators are based on the AMD CDNA 3
 architecture which was designed to deliver leadership performance for HPC, artificial intelligence (AI), and machine
-learning (ML) workloads. The AMD Instinct MI300 Series GPUs are well-suited for extreme scalability and compute performance, running
+learning (ML) workloads. The AMD Instinct MI300 series accelerators are well-suited for extreme scalability and compute performance, running
 on everything from individual servers to the world’s largest exascale supercomputers.
-With the MI300 Series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
+With the MI300 series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
 GPU computational elements of the processor along with the lower levels of the cache hierarchy.
-The following image depicts the structure of a single XCD in the AMD Instinct MI300 GPU Series.
+The following image depicts the structure of a single XCD in the AMD Instinct MI300 accelerator series.
 ```{figure} ../../data/shared/xcd-sys-arch.png
 ---
@@ -39,7 +39,7 @@ infrastructure) using the AMD Infinity Fabric™ technology as interconnect.
 The Matrix Cores inside the CDNA 3 CUs have significant improvements, emphasizing AI and machine
 learning, enhancing throughput of existing data types while adding support for new data types.
 CDNA 2 Matrix Cores support FP16 and BF16, while offering INT8 for inference. Compared to MI250X
-GPUs, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
+accelerators, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
 performance gain of 6.8 times for INT8. FP8 has a performance gain of 16 times compared to FP32,
 while TF32 has a gain of 4 times compared to FP32.
@@ -105,7 +105,7 @@ name: mi300-arch
 alt:
 align: center
 ---
-MI300 Series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
+MI300 series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
 ```
 ## Node-level architecture
@@ -116,11 +116,11 @@ name: mi300-node
 align: center
 ---
-MI300 Series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
+MI300 series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
 ```
 The image above shows the node-level architecture of a system with AMD EPYC processors in a
-dual-socket configuration and eight AMD Instinct MI300X GPUs. The MI300X OAMs attach to the
+dual-socket configuration and eight AMD Instinct MI300X accelerators. The MI300X OAMs attach to the
 host system via PCIe Gen 5 x16 links (yellow lines). The GPUs are using seven high-bandwidth,
 low-latency AMD Infinity Fabric™ links (red lines) to form a fully connected 8-GPU system.
--- a/docs/conceptual/gpu-arch/mi350-performance-counters.rst
+++ b/docs/conceptual/gpu-arch/mi350-performance-counters.rst
@@ -1,530 +0,0 @@
 .. meta::
  :description: MI355 Series performance counters and metrics
  :keywords: MI355, MI355X, MI3XX
 ***********************************
 MI350 Series performance counters
 ***********************************
 This topic lists and describes the hardware performance counters and derived metrics available on the AMD Instinct MI350 and MI355 GPUs. These counters are available for profiling using `ROCprofiler-SDK <https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/index.html>`_ and `ROCm Compute Profiler <https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/>`_.
 The following sections list the performance counters based on the IP blocks.
 Command processor packet processor counters (CPC)
 ==================================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - CPC_ALWAYS_COUNT
      - Always count.
    * - CPC_ADC_VALID_CHUNK_NOT_AVAIL
      - ADC valid chunk is not available when dispatch walking is in progress in the multi-xcc mode.
    * - CPC_ADC_DISPATCH_ALLOC_DONE
      - ADC dispatch allocation is done.
    * - CPC_ADC_VALID_CHUNK_END
      - ADC crawler's valid chunk end in the multi-xcc mode.
    * - CPC_SYNC_FIFO_FULL_LEVEL
      - SYNC FIFO full last cycles.
    * - CPC_SYNC_FIFO_FULL
      - SYNC FIFO full times.
    * - CPC_GD_BUSY
      - ADC busy.
    * - CPC_TG_SEND
      - ADC thread group send.
    * - CPC_WALK_NEXT_CHUNK
      - ADC walking next valid chunk in the multi-xcc mode.
    * - CPC_STALLED_BY_SE0_SPI
      - ADC CSDATA stalled by SE0SPI.
    * - CPC_STALLED_BY_SE1_SPI
      - ADC CSDATA stalled by SE1SPI.
    * - CPC_STALLED_BY_SE2_SPI
      - ADC CSDATA stalled by SE2SPI.
    * - CPC_STALLED_BY_SE3_SPI
      - ADC CSDATA stalled by SE3SPI.
    * - CPC_LTE_ALL
      - CPC sync counter LteAll. Only Master XCD manages LteAll.
    * - CPC_SYNC_WRREQ_FIFO_BUSY
      - CPC sync counter request FIFO is not empty.
    * - CPC_CANE_BUSY
      - CPC CANE bus is busy, which indicates the presence of inflight sync counter requests.
    * - CPC_CANE_STALL
      - CPC sync counter sending is stalled by CANE.
 Shader pipe interpolators (SPI) counters
 =========================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - SPI_CS0_WINDOW_VALID
      - Clock count enabled by PIPE0 perfcounter_start event.
    * - SPI_CS0_BUSY
      - Number of clocks with outstanding waves for PIPE0 (SPI or SH).
    * - SPI_CS0_NUM_THREADGROUPS
      - Number of thread groups launched for PIPE0.
    * - SPI_CS0_CRAWLER_STALL
      - Number of clocks when PIPE0 event or wave order FIFO is full.
    * - SPI_CS0_EVENT_WAVE
      - Number of PIPE0 events and waves.
    * - SPI_CS0_WAVE
      - Number of PIPE0 waves.
    * - SPI_CS1_WINDOW_VALID
      - Clock count enabled by PIPE1 perfcounter_start event.
    * - SPI_CS1_BUSY
      - Number of clocks with outstanding waves for PIPE1 (SPI or SH).
    * - SPI_CS1_NUM_THREADGROUPS
      - Number of thread groups launched for PIPE1.
    * - SPI_CS1_CRAWLER_STALL
      - Number of clocks when PIPE1 event or wave order FIFO is full.
    * - SPI_CS1_EVENT_WAVE
      - Number of PIPE1 events and waves.
    * - SPI_CS1_WAVE
      - Number of PIPE1 waves.
    * - SPI_CS2_WINDOW_VALID
      - Clock count enabled by PIPE2 perfcounter_start event.
    * - SPI_CS2_BUSY
      - Number of clocks with outstanding waves for PIPE2 (SPI or SH).
    * - SPI_CS2_NUM_THREADGROUPS
      - Number of thread groups launched for PIPE2.
    * - SPI_CS2_CRAWLER_STALL
      - Number of clocks when PIPE2 event or wave order FIFO is full.
    * - SPI_CS2_EVENT_WAVE
      - Number of PIPE2 events and waves.
    * - SPI_CS2_WAVE
      - Number of PIPE2 waves.
    * - SPI_CS3_WINDOW_VALID
      - Clock count enabled by PIPE3 perfcounter_start event.
    * - SPI_CS3_BUSY
      - Number of clocks with outstanding waves for PIPE3 (SPI or SH).
    * - SPI_CS3_NUM_THREADGROUPS
      - Number of thread groups launched for PIPE3.
    * - SPI_CS3_CRAWLER_STALL
      - Number of clocks when PIPE3 event or wave order FIFO is full.
    * - SPI_CS3_EVENT_WAVE
      - Number of PIPE3 events and waves.
    * - SPI_CS3_WAVE
      - Number of PIPE3 waves.
    * - SPI_CSQ_P0_Q0_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue0.
    * - SPI_CSQ_P0_Q1_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue1.
    * - SPI_CSQ_P0_Q2_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue2.
    * - SPI_CSQ_P0_Q3_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue3.
    * - SPI_CSQ_P0_Q4_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue4.
    * - SPI_CSQ_P0_Q5_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue5.
    * - SPI_CSQ_P0_Q6_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue6.
    * - SPI_CSQ_P0_Q7_OCCUPANCY
      - Sum of occupancy info for PIPE0 Queue7.
    * - SPI_CSQ_P1_Q0_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue0.
    * - SPI_CSQ_P1_Q1_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue1.
    * - SPI_CSQ_P1_Q2_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue2.
    * - SPI_CSQ_P1_Q3_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue3.
    * - SPI_CSQ_P1_Q4_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue4.
    * - SPI_CSQ_P1_Q5_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue5.
    * - SPI_CSQ_P1_Q6_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue6.
    * - SPI_CSQ_P1_Q7_OCCUPANCY
      - Sum of occupancy info for PIPE1 Queue7.
    * - SPI_CSQ_P2_Q0_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue0.
    * - SPI_CSQ_P2_Q1_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue1.
    * - SPI_CSQ_P2_Q2_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue2.
    * - SPI_CSQ_P2_Q3_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue3.
    * - SPI_CSQ_P2_Q4_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue4.
    * - SPI_CSQ_P2_Q5_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue5.
    * - SPI_CSQ_P2_Q6_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue6.
    * - SPI_CSQ_P2_Q7_OCCUPANCY
      - Sum of occupancy info for PIPE2 Queue7.
    * - SPI_CSQ_P3_Q0_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue0.
    * - SPI_CSQ_P3_Q1_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue1.
    * - SPI_CSQ_P3_Q2_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue2.
    * - SPI_CSQ_P3_Q3_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue3.
    * - SPI_CSQ_P3_Q4_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue4.
    * - SPI_CSQ_P3_Q5_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue5.
    * - SPI_CSQ_P3_Q6_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue6.
    * - SPI_CSQ_P3_Q7_OCCUPANCY
      - Sum of occupancy info for PIPE3 Queue7.
    * - SPI_CSQ_P0_OCCUPANCY
      - Sum of occupancy info for all PIPE0 queues.
    * - SPI_CSQ_P1_OCCUPANCY
      - Sum of occupancy info for all PIPE1 queues.
    * - SPI_CSQ_P2_OCCUPANCY
      - Sum of occupancy info for all PIPE2 queues.
    * - SPI_CSQ_P3_OCCUPANCY
      - Sum of occupancy info for all PIPE3 queues.
    * - SPI_VWC0_VDATA_VALID_WR
      - Number of clocks VGPR bus_0 writes VGPRs.
    * - SPI_VWC1_VDATA_VALID_WR
      - Number of clocks VGPR bus_1 writes VGPRs.
    * - SPI_CSC_WAVE_CNT_BUSY
      - Number of cycles when there is any wave in the pipe.
 Compute unit (SQ) counters
 ===========================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - SQ_INSTS_VALU_MFMA_F6F4
      - Number of VALU V_MFMA_*_F6F4 instructions.
    * - SQ_INSTS_VALU_MFMA_MOPS_F6F4
      - Number of VALU matrix with the performed math operations (add or mul) divided by 512, assuming a full EXEC mask of F6 or F4 data type.
    * - SQ_ACTIVE_INST_VALU2
      - Number of quad-cycles when two VALU instructions are issued (per-simd, nondeterministic).
    * - SQ_INSTS_LDS_LOAD
      - Number of LDS load instructions issued (per-simd, emulated).
    * - SQ_INSTS_LDS_STORE
      - Number of LDS store instructions issued (per-simd, emulated).
    * - SQ_INSTS_LDS_ATOMIC
      - Number of LDS atomic instructions issued (per-simd, emulated).
    * - SQ_INSTS_LDS_LOAD_BANDWIDTH
      - Total number of 64-bytes loaded (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
    * - SQ_INSTS_LDS_STORE_BANDWIDTH
      - Total number of 64-bytes written (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
    * - SQ_INSTS_LDS_ATOMIC_BANDWIDTH
      - Total number of 64-bytes atomic (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
    * - SQ_INSTS_VALU_FLOPS_FP16
      - Counts FLOPS per instruction on float 16 excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_FLOPS_FP32
      - Counts FLOPS per instruction on float 32 excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_FLOPS_FP64
      - Counts FLOPS per instruction on float 64 excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_FLOPS_FP16_TRANS
      - Counts FLOPS per instruction on float 16 trans excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_FLOPS_FP32_TRANS
      - Counts FLOPS per instruction on float 32 trans excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_FLOPS_FP64_TRANS
      - Counts FLOPS per instruction on float 64 trans excluding MFMA/SMFMA.
    * - SQ_INSTS_VALU_IOPS
      - Counts OPS per instruction on integer or unsigned or bit data (per-simd, emulated).
    * - SQ_LDS_DATA_FIFO_FULL
      - Number of cycles LDS data FIFO is full (nondeterministic, unwindowed).
    * - SQ_LDS_CMD_FIFO_FULL
      - Number of cycles LDS command FIFO is full (nondeterministic, unwindowed).
    * - SQ_VMEM_TA_ADDR_FIFO_FULL
      - Number of cycles texture requests are stalled due to full address FIFO in TA (nondeterministic, unwindowed).
    * - SQ_VMEM_TA_CMD_FIFO_FULL
      - Number of cycles texture requests are stalled due to full cmd FIFO in TA (nondeterministic, unwindowed).
    * - SQ_VMEM_WR_TA_DATA_FIFO_FULL
      - Number of cycles texture writes are stalled due to full data FIFO in TA (nondeterministic, unwindowed).
    * - SQC_ICACHE_MISSES_DUPLICATE
      - Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
    * - SQC_DCACHE_MISSES_DUPLICATE
      - Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
 Texture addressing (TA) unit counters
 ======================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - TA_BUFFER_READ_LDS_WAVEFRONTS
      - Number of buffer read wavefronts for LDS return processed by the TA.
    * - TA_FLAT_READ_LDS_WAVEFRONTS
      - Number of flat opcode reads for LDS return processed by the TA.
 Texture data (TD) unit counters
 ================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - TD_WRITE_ACKT_WAVEFRONT
      - Number of write acknowledgments, sent to SQ and not to SP.
    * - TD_TD_SP_TRAFFIC
      - Number of times this TD sends data to the SP.
 Texture cache per pipe (TCP) counters
 ======================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - TCP_TCP_TA_ADDR_STALL_CYCLES
      - TCP stalls TA addr interface.
    * - TCP_TCP_TA_DATA_STALL_CYCLES
      - TCP stalls TA data interface. Now windowed.
    * - TCP_LFIFO_STALL_CYCLES
      - Memory latency FIFOs full stall.
    * - TCP_RFIFO_STALL_CYCLES
      - Memory Request FIFOs full stall.
    * - TCP_TCR_RDRET_STALL
      - Write into cache stalled by read return from TCR.
    * - TCP_PENDING_STALL_CYCLES
      - Stall due to data pending from L2.
    * - TCP_UTCL1_SERIALIZATION_STALL
      - Total number of stalls caused due to serializing translation requests through the UTCL1.
    * - TCP_UTCL1_THRASHING_STALL
      - Stall caused by thrashing feature in any probe. Lacks accuracy when the stall signal overlaps between probe0 and probe1, which is worse with MECO of thrashing deadlock. Some probe0 events could miss being counted in with MECO on. This perf count provides a rough thrashing estimate.
    * - TCP_UTCL1_TRANSLATION_MISS_UNDER_MISS
      - Translation miss_under_miss.
    * - TCP_UTCL1_STALL_INFLIGHT_MAX
      - Total UTCL1 stalls due to inflight counter saturation.
    * - TCP_UTCL1_STALL_LRU_INFLIGHT
      - Total UTCL1 stalls due to LRU cache line with inflight traffic.
    * - TCP_UTCL1_STALL_MULTI_MISS
      - Total UTCL1 stalls due to arbitrated multiple misses.
    * - TCP_UTCL1_LFIFO_FULL
      - Total UTCL1 and UTCL2 latency, which hides FIFO full cycles.
    * - TCP_UTCL1_STALL_LFIFO_NOT_RES
      - Total UTCL1 stalls due to UTCL2 latency, which hides FIFO output (not resident).
    * - TCP_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS
      - Total UTCL1 stalls due to UTCL2_req being out of credits.
    * - TCP_CLIENT_UTCL1_INFLIGHT
      - The sum of inflight client to UTCL1 requests per cycle.
    * - TCP_TAGRAM0_REQ
      - Total L2 requests mapping to TagRAM 0 from this TCP to all TCCs.
    * - TCP_TAGRAM1_REQ
      - Total L2 requests mapping to TagRAM 1 from this TCP to all TCCs.
    * - TCP_TAGRAM2_REQ
      - Total L2 requests mapping to TagRAM 2 from this TCP to all TCCs.
    * - TCP_TAGRAM3_REQ
      - Total L2 requests mapping to TagRAM 3 from this TCP to all TCCs.
    * - TCP_TCP_LATENCY
      - Total TCP wave latency (from the first clock of wave entering to the first clock of wave leaving). Divide by TA_TCP_STATE_READ to find average wave latency.
    * - TCP_TCC_READ_REQ_LATENCY
      - Total TCP to TCC request latency for reads and atomics with return. Not Windowed.
    * - TCP_TCC_WRITE_REQ_LATENCY
      - Total TCP to TCC request latency for writes and atomics without return. Not Windowed.
    * - TCP_TCC_WRITE_REQ_HOLE_LATENCY
      - Total TCP req to TCC hole latency for writes and atomics. Not Windowed.
 Texture cache per channel (TCC) counters
 =========================================
 .. list-table::
    :header-rows: 1
    * - Hardware counter
      - Definition
    * - TCC_READ_SECTORS
      - Total number of 32B data sectors in read requests.
    * - TCC_WRITE_SECTORS
      - Total number of 32B data sectors in write requests.
    * - TCC_ATOMIC_SECTORS
      - Total number of 32B data sectors in atomic requests.
    * - TCC_BYPASS_REQ
      - Number of bypass requests. This is measured at the tag block.
    * - TCC_LATENCY_FIFO_FULL
      - Number of cycles when the latency FIFO is full.
    * - TCC_SRC_FIFO_FULL
      - Number of cycles when the SRC FIFO is assumed to be full as measured at the IB block.
    * - TCC_EA0_RDREQ_64B
      - Number of 64-byte TCC/EA read requests.
    * - TCC_EA0_RDREQ_128B
      - Number of 128-byte TCC/EA read requests.
    * - TCC_IB_REQ
      - Number of requests through the IB. This measures the number of raw requests from graphics clients to this TCC.
    * - TCC_IB_STALL
      - Number of cycles when the IB output is stalled.
    * - TCC_EA0_WRREQ_WRITE_DRAM
      - Number of TCC/EA write requests (32-byte or 64-byte) destined for DRAM (MC).
    * - TCC_EA0_WRREQ_ATOMIC_DRAM
      - Number of TCC/EA atomic requests (32-byte or 64-byte) destined for DRAM (MC).
    * - TCC_EA0_RDREQ_DRAM_32B
      - Number of 32-byte TCC/EA read requests due to DRAM traffic. One 64-byte request is counted as two and one 128-byte as four.
    * - TCC_EA0_RDREQ_GMI_32B
      - Number of 32-byte TCC/EA read requests due to GMI traffic. One 64-byte request is counted as two and one 128-byte as four.
    * - TCC_EA0_RDREQ_IO_32B
      - Number of 32-byte TCC/EA read requests due to IO traffic. One 64-byte request is counted as two and one 128-byte as four.
    * - TCC_EA0_WRREQ_WRITE_DRAM_32B
      - Number of 32-byte TCC/EA write requests due to DRAM traffic. One 64-byte request is counted as two.
    * - TCC_EA0_WRREQ_ATOMIC_DRAM_32B
      - Number of 32-byte TCC/EA atomic requests due to DRAM traffic. One 64-byte request is counted as two.
    * - TCC_EA0_WRREQ_WRITE_GMI_32B
      - Number of 32-byte TCC/EA write requests due to GMI traffic. One 64-byte request is counted as two.
    * - TCC_EA0_WRREQ_ATOMIC_GMI_32B
      - Number of 32-byte TCC/EA atomic requests due to GMI traffic. One 64-byte request is counted as two.
    * - TCC_EA0_WRREQ_WRITE_IO_32B
      - Number of 32-byte TCC/EA write requests due to IO traffic. One 64-byte request is counted as two.
    * - TCC_EA0_WRREQ_ATOMIC_IO_32B
      - Number of 32-byte TCC/EA atomic requests due to IO traffic. One 64-byte request is counted as two.
--- a/docs/conceptual/gpu-isolation.md
+++ b/docs/conceptual/gpu-isolation.md
@@ -34,7 +34,7 @@ Runtime
 ```{code-block} shell
 :caption: Example to expose the 1. device and a device based on UUID.
-export ROCR_VISIBLE_DEVICES="0,GPU-4b2c1a9f-8d3e-6f7a-b5c9-2e4d8a1f6c3b"
+export ROCR_VISIBLE_DEVICES="0,GPU-DEADBEEFDEADBEEF"
 ```
 ### `GPU_DEVICE_ORDINAL`
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -8,7 +8,6 @@ import os
 import shutil
 import sys
 from pathlib import Path
 from subprocess import run
 gh_release_path = os.path.join("..", "RELEASE.md")
 gh_changelog_path = os.path.join("..", "CHANGELOG.md")
@@ -81,27 +80,24 @@ latex_elements = {
 }
 html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm.docs.amd.com")
-html_context = {"docs_header_version": "7.1.1"}
+html_context = {}
 if os.environ.get("READTHEDOCS", "") == "True":
    html_context["READTHEDOCS"] = True
 # Check if the branch is a docs/ branch
 official_branch = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True).stdout.find("docs/")
 # configurations for PDF output by Read the Docs
 project = "ROCm Documentation"
 project_path = os.path.abspath(".").replace("\\", "/")
 author = "Advanced Micro Devices, Inc."
 copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
-version = "7.1.1"
+version = "6.4.3"
-release = "7.1.1"
+release = "6.4.3"
 setting_all_article_info = True
 all_article_info_os = ["linux", "windows"]
 all_article_info_author = ""
 # pages with specific settings
 article_pages = [
-    {"file": "about/release-notes", "os": ["linux"], "date": "2025-11-26"},
+    {"file": "about/release-notes", "os": ["linux"], "date": "2025-08-07"},
    {"file": "release/changelog", "os": ["linux"],},
    {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
@@ -111,17 +107,12 @@ article_pages = [
    {"file": "compatibility/ml-compatibility/stanford-megatron-lm-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/dgl-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/megablocks-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/ray-compatibility", "os": ["linux"]},
+    {"file": "compatibility/ml-compatibility/taichi-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/llama-cpp-compatibility", "os": ["linux"]},
    {"file": "compatibility/ml-compatibility/flashinfer-compatibility", "os": ["linux"]},
    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/system-setup/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/system-setup/multi-node-setup", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/system-setup/prerequisite-system-validation", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/system-setup/system-health-check", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
@@ -133,37 +124,15 @@ article_pages = [
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.3", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.6", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.8", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.9", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.10", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-megatron", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.8", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.9", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.10", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.3", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.6", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.7", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.8", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.9", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.10", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.8", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.9", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.10", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},    
    {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
@@ -187,17 +156,7 @@ article_pages = [
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250702", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250715", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.0-20250812", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.1-20250909", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.11.1-20251103", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/sglang-history", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.11", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.12", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.13", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
@@ -225,7 +184,7 @@ external_toc_path = "./sphinx/_toc.yml"
 # Add the _extensions directory to Python's search path
 sys.path.append(str(Path(__file__).parent / 'extension'))
-extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "remote-content", "version-ref", "csv-to-list-table"]
+extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref", "csv-to-list-table"]
 compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv')
@@ -235,14 +194,10 @@ external_projects_current_project = "rocm"
 # external_projects_remote_repository = ""
 html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "https://rocm-stg.amd.com/")
-html_context = {"docs_header_version": "7.1.0"}
+html_context = {}
 if os.environ.get("READTHEDOCS", "") == "True":
    html_context["READTHEDOCS"] = True
 html_context["official_branch"] = official_branch
 html_context["version"] = version
 html_context["release"] = release
 html_theme = "rocm_docs_theme"
 html_theme_options = {"flavor": "rocm-docs-home"}
@@ -261,13 +216,10 @@ suppress_warnings = ["autosectionlabel.*"]
 html_context = {
    "project_path" : {project_path},
-    "gpu_type" : [('AMD Instinct GPUs', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ],
+    "gpu_type" : [('AMD Instinct accelerators', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ],
    "atomics_type" : [('HW atomics', 'hw-atomics'), ('CAS emulation', 'cas-atomics')],
    "pcie_type" : [('No PCIe atomics', 'nopcie'), ('PCIe atomics', 'pcie')],
    "memory_type" : [('Device DRAM', 'device-dram'), ('Migratable Host DRAM', 'migratable-host-dram'), ('Pinned Host DRAM', 'pinned-host-dram')],
    "granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')],
    "scope_type" : [('Device', 'device'), ('System', 'system')]
 }
 # Disable figure and table numbering
 numfig = False
--- a/docs/data/about/compatibility/floating-point-data-types.png
+++ b/docs/data/about/compatibility/floating-point-data-types.png
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.0_20250812-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.0_20250812-benchmark-models.yaml
@@ -1,91 +0,0 @@
 vllm_benchmark:
  unified_docker:
    latest:
      pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812
      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa
      rocm_version: 6.4.1
      vllm_version: 0.10.0 (0.10.1.dev395+g340ea86df.rocm641)
      pytorch_version: 2.7.0+gitf717b2a
      hipblaslt_version: 0.15
  model_groups:
    - group: Meta Llama
      tag: llama
      models:
      - model: Llama 3.1 8B
        mad_tag: pyt_vllm_llama-3.1-8b
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: float16
      - model: Llama 3.1 70B
        mad_tag: pyt_vllm_llama-3.1-70b
        model_repo: meta-llama/Llama-3.1-70B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
        precision: float16
      - model: Llama 3.1 405B
        mad_tag: pyt_vllm_llama-3.1-405b
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
      - model: Llama 2 70B
        mad_tag: pyt_vllm_llama-2-70b
        model_repo: meta-llama/Llama-2-70b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
        precision: float16
      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 70B FP8
        mad_tag: pyt_vllm_llama-3.1-70b_fp8
        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 405B FP8
        mad_tag: pyt_vllm_llama-3.1-405b_fp8
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
    - group: Mistral AI
      tag: mistral
      models:
      - model: Mixtral MoE 8x7B
        mad_tag: pyt_vllm_mixtral-8x7b
        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
        precision: float16
      - model: Mixtral MoE 8x22B
        mad_tag: pyt_vllm_mixtral-8x22b
        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
        precision: float16
      - model: Mixtral MoE 8x7B FP8
        mad_tag: pyt_vllm_mixtral-8x7b_fp8
        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        precision: float8
      - model: Mixtral MoE 8x22B FP8
        mad_tag: pyt_vllm_mixtral-8x22b_fp8
        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        precision: float8
    - group: Qwen
      tag: qwen
      models:
      - model: QwQ-32B
        mad_tag: pyt_vllm_qwq-32b
        model_repo: Qwen/QwQ-32B
        url: https://huggingface.co/Qwen/QwQ-32B
        precision: float16
      - model: Qwen3 30B A3B
        mad_tag: pyt_vllm_qwen3-30b-a3b
        model_repo: Qwen/Qwen3-30B-A3B
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
        precision: float16
    - group: Microsoft Phi
      tag: phi
      models:
      - model: Phi-4
        mad_tag: pyt_vllm_phi-4
        model_repo: microsoft/phi-4
        url: https://huggingface.co/microsoft/phi-4
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20250909-benchmark-models.yaml
@@ -1,188 +0,0 @@
 dockers:
  - pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909
    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c
    components:
      ROCm: 6.4.1
      vLLM: 0.10.1 (0.10.1rc2.dev409+g0b6bf6691.rocm641)
      PyTorch: 2.7.0+gitf717b2a
      hipBLASLt: 0.15
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 3.1 8B
      mad_tag: pyt_vllm_llama-3.1-8b
      model_repo: meta-llama/Llama-3.1-8B-Instruct
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: float16
      config:
        tp: 1
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Llama 3.1 70B
      mad_tag: pyt_vllm_llama-3.1-70b
      model_repo: meta-llama/Llama-3.1-70B-Instruct
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: float16
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Llama 3.1 405B
      mad_tag: pyt_vllm_llama-3.1-405b
      model_repo: meta-llama/Llama-3.1-405B-Instruct
      url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
      precision: float16
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Llama 2 70B
      mad_tag: pyt_vllm_llama-2-70b
      model_repo: meta-llama/Llama-2-70b-chat-hf
      url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
      precision: float16
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 4096
        max_num_batched_tokens: 4096
        max_model_len: 4096
    - model: Llama 3.1 8B FP8
      mad_tag: pyt_vllm_llama-3.1-8b_fp8
      model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
      url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
      precision: float8
      config:
        tp: 1
        dtype: auto
        kv_cache_dtype: fp8
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Llama 3.1 70B FP8
      mad_tag: pyt_vllm_llama-3.1-70b_fp8
      model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
      url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
      precision: float8
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: fp8
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Llama 3.1 405B FP8
      mad_tag: pyt_vllm_llama-3.1-405b_fp8
      model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
      url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
      precision: float8
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: fp8
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
  - group: Mistral AI
    tag: mistral
    models:
    - model: Mixtral MoE 8x7B
      mad_tag: pyt_vllm_mixtral-8x7b
      model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
      url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
      precision: float16
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 32768
        max_num_batched_tokens: 32768
        max_model_len: 8192
    - model: Mixtral MoE 8x22B
      mad_tag: pyt_vllm_mixtral-8x22b
      model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
      url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
      precision: float16
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 65536
        max_num_batched_tokens: 65536
        max_model_len: 8192
    - model: Mixtral MoE 8x7B FP8
      mad_tag: pyt_vllm_mixtral-8x7b_fp8
      model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
      url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
      precision: float8
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: fp8
        max_seq_len_to_capture: 32768
        max_num_batched_tokens: 32768
        max_model_len: 8192
    - model: Mixtral MoE 8x22B FP8
      mad_tag: pyt_vllm_mixtral-8x22b_fp8
      model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
      url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
      precision: float8
      config:
        tp: 8
        dtype: auto
        kv_cache_dtype: fp8
        max_seq_len_to_capture: 65536
        max_num_batched_tokens: 65536
        max_model_len: 8192
  - group: Qwen
    tag: qwen
    models:
    - model: QwQ-32B
      mad_tag: pyt_vllm_qwq-32b
      model_repo: Qwen/QwQ-32B
      url: https://huggingface.co/Qwen/QwQ-32B
      precision: float16
      config:
        tp: 1
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 131072
        max_num_batched_tokens: 131072
        max_model_len: 8192
    - model: Qwen3 30B A3B
      mad_tag: pyt_vllm_qwen3-30b-a3b
      model_repo: Qwen/Qwen3-30B-A3B
      url: https://huggingface.co/Qwen/Qwen3-30B-A3B
      precision: float16
      config:
        tp: 1
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 32768
        max_num_batched_tokens: 32768
        max_model_len: 8192
  - group: Microsoft Phi
    tag: phi
    models:
    - model: Phi-4
      mad_tag: pyt_vllm_phi-4
      model_repo: microsoft/phi-4
      url: https://huggingface.co/microsoft/phi-4
      config:
        tp: 1
        dtype: auto
        kv_cache_dtype: auto
        max_seq_len_to_capture: 16384
        max_num_batched_tokens: 16384
        max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
@@ -1,316 +0,0 @@
 dockers:
  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
    components:
      ROCm: 7.0.0
      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
      PyTorch: 2.9.0a0+git1c57644
      hipBLASLt: 1.0.0
    dockerfile:
      commit: 790d22168820507f3105fef29596549378cfe399
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 2 70B
        mad_tag: pyt_vllm_llama-2-70b
        model_repo: meta-llama/Llama-2-70b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 4096
          max_model_len: 4096
      - model: Llama 3.1 8B
        mad_tag: pyt_vllm_llama-3.1-8b
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B
        mad_tag: pyt_vllm_llama-3.1-405b
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B FP8
        mad_tag: pyt_vllm_llama-3.1-405b_fp8
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B MXFP4
        mad_tag: pyt_vllm_llama-3.1-405b_fp4
        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B
        mad_tag: pyt_vllm_llama-3.3-70b
        model_repo: meta-llama/Llama-3.3-70B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B FP8
        mad_tag: pyt_vllm_llama-3.3-70b_fp8
        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B MXFP4
        mad_tag: pyt_vllm_llama-3.3-70b_fp4
        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 4 Scout 17Bx16E
        mad_tag: pyt_vllm_llama-4-scout-17b-16e
        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E FP8
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek R1 0528 FP8
        mad_tag: pyt_vllm_deepseek-r1
        model_repo: deepseek-ai/DeepSeek-R1-0528
        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_seqs: 1024
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: OpenAI GPT OSS
    tag: gpt-oss
    models:
      - model: GPT OSS 20B
        mad_tag: pyt_vllm_gpt-oss-20b
        model_repo: openai/gpt-oss-20b
        url: https://huggingface.co/openai/gpt-oss-20b
        precision: bfloat16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
      - model: GPT OSS 120B
        mad_tag: pyt_vllm_gpt-oss-120b
        model_repo: openai/gpt-oss-120b
        url: https://huggingface.co/openai/gpt-oss-120b
        precision: bfloat16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral MoE 8x7B
        mad_tag: pyt_vllm_mixtral-8x7b
        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x7B FP8
        mad_tag: pyt_vllm_mixtral-8x7b_fp8
        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x22B
        mad_tag: pyt_vllm_mixtral-8x22b
        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 65536
          max_model_len: 8192
      - model: Mixtral MoE 8x22B FP8
        mad_tag: pyt_vllm_mixtral-8x22b_fp8
        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 65536
          max_model_len: 8192
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen3 8B
        mad_tag: pyt_vllm_qwen3-8b
        model_repo: Qwen/Qwen3-8B
        url: https://huggingface.co/Qwen/Qwen3-8B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 32B
        mad_tag: pyt_vllm_qwen3-32b
        model_repo: Qwen/Qwen3-32b
        url: https://huggingface.co/Qwen/Qwen3-32B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B
        mad_tag: pyt_vllm_qwen3-30b-a3b
        model_repo: Qwen/Qwen3-30B-A3B
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B FP8
        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
        model_repo: Qwen/Qwen3-30B-A3B-FP8
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B
        mad_tag: pyt_vllm_qwen3-235b-a22b
        model_repo: Qwen/Qwen3-235B-A22B
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B FP8
        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
        model_repo: Qwen/Qwen3-235B-A22B-FP8
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
  - group: Microsoft Phi
    tag: phi
    models:
      - model: Phi-4
        mad_tag: pyt_vllm_phi-4
        model_repo: microsoft/phi-4
        url: https://huggingface.co/microsoft/phi-4
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 16384
          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.11.1_20251103-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.11.1_20251103-benchmark-models.yaml
@@ -1,316 +0,0 @@
 dockers:
  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.1_20251103
    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.1_20251103/images/sha256-8d60429043d4d00958da46039a1de0d9b82df814d45da482497eef26a6076506
    components:
      ROCm: 7.0.0
      vLLM: 0.11.1 (0.11.1rc2.dev141+g38f225c2a.rocm700)
      PyTorch: 2.9.0a0+git1c57644
      hipBLASLt: 1.0.0
    dockerfile:
      commit: 38f225c2abeadc04c2cc398814c2f53ea02c3c72
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 2 70B
        mad_tag: pyt_vllm_llama-2-70b
        model_repo: meta-llama/Llama-2-70b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 4096
          max_model_len: 4096
      - model: Llama 3.1 8B
        mad_tag: pyt_vllm_llama-3.1-8b
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B
        mad_tag: pyt_vllm_llama-3.1-405b
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B FP8
        mad_tag: pyt_vllm_llama-3.1-405b_fp8
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B MXFP4
        mad_tag: pyt_vllm_llama-3.1-405b_fp4
        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B
        mad_tag: pyt_vllm_llama-3.3-70b
        model_repo: meta-llama/Llama-3.3-70B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B FP8
        mad_tag: pyt_vllm_llama-3.3-70b_fp8
        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B MXFP4
        mad_tag: pyt_vllm_llama-3.3-70b_fp4
        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 4 Scout 17Bx16E
        mad_tag: pyt_vllm_llama-4-scout-17b-16e
        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E FP8
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek R1 0528 FP8
        mad_tag: pyt_vllm_deepseek-r1
        model_repo: deepseek-ai/DeepSeek-R1-0528
        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_seqs: 1024
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: OpenAI GPT OSS
    tag: gpt-oss
    models:
      - model: GPT OSS 20B
        mad_tag: pyt_vllm_gpt-oss-20b
        model_repo: openai/gpt-oss-20b
        url: https://huggingface.co/openai/gpt-oss-20b
        precision: bfloat16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
      - model: GPT OSS 120B
        mad_tag: pyt_vllm_gpt-oss-120b
        model_repo: openai/gpt-oss-120b
        url: https://huggingface.co/openai/gpt-oss-120b
        precision: bfloat16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral MoE 8x7B
        mad_tag: pyt_vllm_mixtral-8x7b
        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x7B FP8
        mad_tag: pyt_vllm_mixtral-8x7b_fp8
        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x22B
        mad_tag: pyt_vllm_mixtral-8x22b
        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 65536
          max_model_len: 8192
      - model: Mixtral MoE 8x22B FP8
        mad_tag: pyt_vllm_mixtral-8x22b_fp8
        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 65536
          max_model_len: 8192
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen3 8B
        mad_tag: pyt_vllm_qwen3-8b
        model_repo: Qwen/Qwen3-8B
        url: https://huggingface.co/Qwen/Qwen3-8B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 32B
        mad_tag: pyt_vllm_qwen3-32b
        model_repo: Qwen/Qwen3-32b
        url: https://huggingface.co/Qwen/Qwen3-32B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B
        mad_tag: pyt_vllm_qwen3-30b-a3b
        model_repo: Qwen/Qwen3-30B-A3B
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B FP8
        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
        model_repo: Qwen/Qwen3-30B-A3B-FP8
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B
        mad_tag: pyt_vllm_qwen3-235b-a22b
        model_repo: Qwen/Qwen3-235B-A22B
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B FP8
        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
        model_repo: Qwen/Qwen3-235B-A22B-FP8
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
  - group: Microsoft Phi
    tag: phi
    models:
      - model: Phi-4
        mad_tag: pyt_vllm_phi-4
        model_repo: microsoft/phi-4
        url: https://huggingface.co/microsoft/phi-4
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 16384
          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.9.1_20250715-benchmark_models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.9.1_20250715-benchmark_models.yaml
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.10-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.10-inference-models.yaml
@@ -1,55 +0,0 @@
 xdit_diffusion_inference:
  docker:
    pull_tag: rocm/pytorch-xdit:v25.10
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e
    ROCm: 7.9.0
    components:
      TheRock: 7afbe45
      rccl: 9b04b2a
      composable_kernel: b7a806f
      rocm-libraries: f104555
      rocm-systems: 25922d0
      torch: 2.10.0a0+gite9c9017
      torchvision: 0.22.0a0+966da7e
      triton: 3.5.0+git52e49c12
      accelerate: 1.11.0.dev0
      aiter: 0.1.5.post4.dev20+ga25e55e79
      diffusers: 0.36.0.dev0
      xfuser: 0.4.4
      yunchang: 0.6.3.post1
  model_groups:
    - group: Hunyuan Video
      tag: hunyuan
      models:
        - model: Hunyuan Video
          model_name: hunyuanvideo
          model_repo: tencent/HunyuanVideo
          revision: refs/pr/18
          url: https://huggingface.co/tencent/HunyuanVideo
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
    - group: Wan-AI
      tag: wan
      models:
        - model: Wan2.1
          model_name: wan2_1-i2v-14b-720p
          model_repo: Wan-AI/Wan2.1-I2V-14B-720P
          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
        - model: Wan2.2
          model_name: wan2_2-i2v-a14b
          model_repo: Wan-AI/Wan2.2-I2V-A14B
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
    - group: FLUX
      tag: flux
      models:
        - model: FLUX.1
          model_name: FLUX.1-dev
          model_repo: black-forest-labs/FLUX.1-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.11-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.11-inference-models.yaml
@@ -1,109 +0,0 @@
 xdit_diffusion_inference:
  docker:
    - version: v25-11
      pull_tag: rocm/pytorch-xdit:v25.11
      docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216
      ROCm: 7.10.0
      supported_models:
        - group: Hunyuan Video
          models:
            - Hunyuan Video
        - group: Wan-AI
          models:
            - Wan2.1
            - Wan2.2
        - group: FLUX
          models:
            - FLUX.1
      whats_new:
        - "Minor bug fixes and clarifications to READMEs."
        - "Bumps TheRock, AITER, Diffusers, xDiT versions."
        - "Changes Aiter rounding mode for faster gfx942 FWD Attention."
      components:
        TheRock: 3e3f834
        rccl: d23d18f
        composable_kernel: 2570462
        rocm-libraries: 0588f07
        rocm-systems: 473025a
        torch: 73adac
        torchvision: f5c6c2e
        triton: 7416ffc
        accelerate: 34c1779
        aiter: de14bec
        diffusers: 40528e9
        xfuser: 83978b5
        yunchang: 2c9b712
    - version: v25-10
      pull_tag: rocm/pytorch-xdit:v25.10
      docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
      ROCm: 7.9.0
      supported_models:
        - group: Hunyuan Video
          models:
            - Hunyuan Video
        - group: Wan-AI
          models:
            - Wan2.1
            - Wan2.2
        - group: FLUX
          models:
            - FLUX.1
      whats_new:
        - "First official xDiT Docker Release for Diffusion Inference."
        - "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
        - "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
      components:
        TheRock: 7afbe45
        rccl: 9b04b2a
        composable_kernel: b7a806f
        rocm-libraries: f104555
        rocm-systems: 25922d0
        torch: 2.10.0a0+gite9c9017
        torchvision: 0.22.0a0+966da7e
        triton: 3.5.0+git52e49c12
        accelerate: 1.11.0.dev0
        aiter: 0.1.5.post4.dev20+ga25e55e79
        diffusers: 0.36.0.dev0
        xfuser: 0.4.4
        yunchang: 0.6.3.post1
  model_groups:
    - group: Hunyuan Video
      tag: hunyuan
      models:
        - model: Hunyuan Video
          page_tag: hunyuan_tag
          model_name: hunyuanvideo
          model_repo: tencent/HunyuanVideo
          revision: refs/pr/18
          url: https://huggingface.co/tencent/HunyuanVideo
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
    - group: Wan-AI
      tag: wan
      models:
        - model: Wan2.1
          page_tag: wan_21_tag
          model_name: wan2_1-i2v-14b-720p
          model_repo: Wan-AI/Wan2.1-I2V-14B-720P
          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
        - model: Wan2.2
          page_tag: wan_22_tag
          model_name: wan2_2-i2v-a14b
          model_repo: Wan-AI/Wan2.2-I2V-A14B
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
    - group: FLUX
      tag: flux
      models:
        - model: FLUX.1
          page_tag: flux_1_tag
          model_name: FLUX.1-dev
          model_repo: black-forest-labs/FLUX.1-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.12-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.12-inference-models.yaml
@@ -1,91 +0,0 @@
 docker:
  pull_tag: rocm/pytorch-xdit:v25.12
  docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256
  ROCm: 7.10.0
  whats_new:
      - "Adds T2V and TI2V support for Wan models."
      - "Adds support for SD-3.5 T2I model."
  components:
    TheRock: 
      version: 3e3f834
      url: https://github.com/ROCm/TheRock
    rccl:
      version: d23d18f
      url: https://github.com/ROCm/rccl
    composable_kernel:
      version: 2570462
      url: https://github.com/ROCm/composable_kernel
    rocm-libraries:
      version: 0588f07
      url: https://github.com/ROCm/rocm-libraries
    rocm-systems:
      version: 473025a
      url: https://github.com/ROCm/rocm-systems
    torch:
      version: 73adac
      url: https://github.com/pytorch/pytorch
    torchvision:
      version: f5c6c2e
      url: https://github.com/pytorch/vision
    triton:
      version: 7416ffc
      url: https://github.com/triton-lang/triton
    accelerate:
      version: 34c1779
      url: https://github.com/huggingface/accelerate
    aiter:
      version: de14bec
      url: https://github.com/ROCm/aiter
    diffusers:
      version: 40528e9
      url: https://github.com/huggingface/diffusers
    xfuser:
      version: ccba9d5
      url: https://github.com/xdit-project/xDiT
    yunchang:
      version: 2c9b712
      url: https://github.com/feifeibear/long-context-attention
  supported_models:
    - group: Hunyuan Video
      js_tag: hunyuan
      models:
        - model: Hunyuan Video
          model_repo: tencent/HunyuanVideo
          revision: refs/pr/18
          url: https://huggingface.co/tencent/HunyuanVideo
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
          js_tag: hunyuan_tag
    - group: Wan-AI
      js_tag: wan
      models:
        - model: Wan2.1
          model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
          js_tag: wan_21_tag
        - model: Wan2.2
          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
          js_tag: wan_22_tag
    - group: FLUX
      js_tag: flux
      models:
        - model: FLUX.1
          model_repo: black-forest-labs/FLUX.1-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
          js_tag: flux_1_tag
    - group: Stable Diffusion
      js_tag: stablediffusion
      models:
        - model: stable-diffusion-3.5-large
          model_repo: stabilityai/stable-diffusion-3.5-large
          url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
          github: https://github.com/Stability-AI/sd3.5
          mad_tag: pyt_xdit_sd_3_5
          js_tag: stable_diffusion_3_5_large_tag
--- a/docs/data/how-to/rocm-for-ai/inference/sglang-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/sglang-benchmark-models.yaml
@@ -1,16 +1,17 @@
-dockers:
+sglang_benchmark:
-  - pull_tag: lmsysorg/sglang:v0.4.5-rocm630
+  unified_docker:
-    docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.4.5-rocm630/images/sha256-63d2cb760a237125daf6612464cfe2f395c0784e21e8b0ea37d551cd10d3c951
+    latest:
-    components:
+      pull_tag: lmsysorg/sglang:v0.4.5-rocm630
-      ROCm: 6.3.0
+      docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.4.5-rocm630/images/sha256-63d2cb760a237125daf6612464cfe2f395c0784e21e8b0ea37d551cd10d3c951
-      SGLang: 0.4.5 (0.4.5-rocm)
+      rocm_version: 6.3.0
-      PyTorch: 2.6.0a0+git8d4926e
+      sglang_version: 0.4.5 (0.4.5-rocm)
-model_groups:
+      pytorch_version: 2.6.0a0+git8d4926e
-  - group: DeepSeek
+  model_groups:
-    tag: deepseek
+    - group: DeepSeek
-    models:
+      tag: deepseek
-    - model: DeepSeek-R1-Distill-Qwen-32B
+      models:
-      mad_tag: pyt_sglang_deepseek-r1-distill-qwen-32b
+      - model: DeepSeek-R1-Distill-Qwen-32B
-      model_repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+        mad_tag: pyt_sglang_deepseek-r1-distill-qwen-32b
-      url: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+        model_repo: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
-      precision: bfloat16
+        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
        precision: bfloat16
--- a/docs/data/how-to/rocm-for-ai/inference/sglang-distributed-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/sglang-distributed-benchmark-models.yaml
@@ -1,32 +0,0 @@
 dockers:
  - pull_tag: lmsysorg/sglang:v0.5.2rc1-rocm700-mi30x
    docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.5.2rc1-rocm700-mi30x/images/sha256-10c4ee502ddba44dd8c13325e6e03868bfe7f43d23d0a44780a8ee8b393f4729
    components:
      ROCm: 7.0.0
      SGLang: v0.5.2rc1
      pytorch-triton-rocm: 3.4.0+rocm7.0.0.gitf9e5bf54
 model_groups:
  - group: Dense models
    tag: dense-models
    models:
      - model: Llama 3.1 8B Instruct
        model_repo: Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
      - model: Llama 3.1 405B FP8 KV
        model_repo: Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
      - model: Llama 3.3 70B FP8 KV
        model_repo: amd-Llama-3.3-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
      - model: Qwen3 32B
        model_repo: Qwen3-32B
        url: https://huggingface.co/Qwen/Qwen3-32B
  - group: Small experts models
    tag: small-experts-models
    models:
      - model: DeepSeek V3
        model_repo: DeepSeek-V3
        url: https://huggingface.co/deepseek-ai/DeepSeek-V3
      - model: Mixtral 8x7B v0.1
        model_repo: Mixtral-8x7B-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-v0.1
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,316 +1,88 @@
-dockers:
+vllm_benchmark:
-  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.2_20251210
+  unified_docker:
-    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.2_20251210/images/sha256-e7f02dd2ce3824959658bc0391296f6158638e3ebce164f6c019c4eca8150ec7
+    latest:
-    components:
+      # TODO: update me
-      ROCm: 7.0.0
+      pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812
-      vLLM: 0.11.2 (0.11.2.dev673+g839868462.rocm700)
+      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa
-      PyTorch: 2.9.0a0+git1c57644
+      rocm_version: 6.4.1
-      hipBLASLt: 1.0.0
+      vllm_version: 0.10.0 (0.10.1.dev395+g340ea86df.rocm641)
-    dockerfile:
+      pytorch_version: 2.7.0+gitf717b2a (2.7.0+gitf717b2a)
-      commit: 8398684622109c806a35d660647060b0b9910663
+      hipblaslt_version: 0.15
-model_groups:
+  model_groups:
-  - group: Meta Llama
+    - group: Meta Llama
-    tag: llama
+      tag: llama
-    models:
+      models:
      - model: Llama 2 70B
        mad_tag: pyt_vllm_llama-2-70b
        model_repo: meta-llama/Llama-2-70b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 4096
          max_model_len: 4096
      - model: Llama 3.1 8B
        mad_tag: pyt_vllm_llama-3.1-8b
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: float16
-        config:
+      - model: Llama 3.1 70B
-          tp: 1
+        mad_tag: pyt_vllm_llama-3.1-70b
-          dtype: auto
+        model_repo: meta-llama/Llama-3.1-70B-Instruct
-          kv_cache_dtype: auto
+        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-          max_num_batched_tokens: 131072
+        precision: float16
          max_model_len: 8192
      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B
        mad_tag: pyt_vllm_llama-3.1-405b
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
-        config:
+      - model: Llama 2 70B
-          tp: 8
+        mad_tag: pyt_vllm_llama-2-70b
-          dtype: auto
+        model_repo: meta-llama/Llama-2-70b-chat-hf
-          kv_cache_dtype: auto
+        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-          max_num_batched_tokens: 131072
+        precision: float16
-          max_model_len: 8192
+      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 70B FP8
        mad_tag: pyt_vllm_llama-3.1-70b_fp8
        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 405B FP8
        mad_tag: pyt_vllm_llama-3.1-405b_fp8
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
-        config:
+    - group: Mistral AI
-          tp: 8
+      tag: mistral
-          dtype: auto
+      models:
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.1 405B MXFP4
        mad_tag: pyt_vllm_llama-3.1-405b_fp4
        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B
        mad_tag: pyt_vllm_llama-3.3-70b
        model_repo: meta-llama/Llama-3.3-70B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B FP8
        mad_tag: pyt_vllm_llama-3.3-70b_fp8
        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 3.3 70B MXFP4
        mad_tag: pyt_vllm_llama-3.3-70b_fp4
        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
        precision: float4
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
      - model: Llama 4 Scout 17Bx16E
        mad_tag: pyt_vllm_llama-4-scout-17b-16e
        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Llama 4 Maverick 17Bx128E FP8
        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek R1 0528 FP8
        mad_tag: pyt_vllm_deepseek-r1
        model_repo: deepseek-ai/DeepSeek-R1-0528
        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_seqs: 1024
          max_num_batched_tokens: 131072
          max_model_len: 8192
  - group: OpenAI GPT OSS
    tag: gpt-oss
    models:
      - model: GPT OSS 20B
        mad_tag: pyt_vllm_gpt-oss-20b
        model_repo: openai/gpt-oss-20b
        url: https://huggingface.co/openai/gpt-oss-20b
        precision: bfloat16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
      - model: GPT OSS 120B
        mad_tag: pyt_vllm_gpt-oss-120b
        model_repo: openai/gpt-oss-120b
        url: https://huggingface.co/openai/gpt-oss-120b
        precision: bfloat16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 8192
          max_model_len: 8192
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral MoE 8x7B
        mad_tag: pyt_vllm_mixtral-8x7b
        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x7B FP8
        mad_tag: pyt_vllm_mixtral-8x7b_fp8
        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 32768
          max_model_len: 8192
      - model: Mixtral MoE 8x22B
        mad_tag: pyt_vllm_mixtral-8x22b
        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
        precision: float16
-        config:
+      - model: Mixtral MoE 8x7B FP8
-          tp: 8
+        mad_tag: pyt_vllm_mixtral-8x7b_fp8
-          dtype: auto
+        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-          kv_cache_dtype: auto
+        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-          max_num_batched_tokens: 65536
+        precision: float8
          max_model_len: 8192
      - model: Mixtral MoE 8x22B FP8
        mad_tag: pyt_vllm_mixtral-8x22b_fp8
        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        precision: float8
-        config:
+    - group: Qwen
-          tp: 8
+      tag: qwen
-          dtype: auto
+      models:
-          kv_cache_dtype: fp8
+      - model: QwQ-32B
-          max_num_batched_tokens: 65536
+        mad_tag: pyt_vllm_qwq-32b
-          max_model_len: 8192
+        model_repo: Qwen/QwQ-32B
-  - group: Qwen
+        url: https://huggingface.co/Qwen/QwQ-32B
    tag: qwen
    models:
      - model: Qwen3 8B
        mad_tag: pyt_vllm_qwen3-8b
        model_repo: Qwen/Qwen3-8B
        url: https://huggingface.co/Qwen/Qwen3-8B
        precision: float16
-        config:
+        tunableop: true
-          tp: 1
+    - group: Microsoft Phi
-          dtype: auto
+      tag: phi
-          kv_cache_dtype: auto
+      models:
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 32B
        mad_tag: pyt_vllm_qwen3-32b
        model_repo: Qwen/Qwen3-32b
        url: https://huggingface.co/Qwen/Qwen3-32B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B
        mad_tag: pyt_vllm_qwen3-30b-a3b
        model_repo: Qwen/Qwen3-30B-A3B
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 30B A3B FP8
        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
        model_repo: Qwen/Qwen3-30B-A3B-FP8
        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B
        mad_tag: pyt_vllm_qwen3-235b-a22b
        model_repo: Qwen/Qwen3-235B-A22B
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
        precision: float16
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 40960
          max_model_len: 8192
      - model: Qwen3 235B A22B FP8
        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
        model_repo: Qwen/Qwen3-235B-A22B-FP8
        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
        precision: float8
        config:
          tp: 8
          dtype: auto
          kv_cache_dtype: fp8
          max_num_batched_tokens: 40960
          max_model_len: 8192
  - group: Microsoft Phi
    tag: phi
    models:
      - model: Phi-4
        mad_tag: pyt_vllm_phi-4
        model_repo: microsoft/phi-4
        url: https://huggingface.co/microsoft/phi-4
        precision: float16
        config:
          tp: 1
          dtype: auto
          kv_cache_dtype: auto
          max_num_batched_tokens: 16384
          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
@@ -1,105 +0,0 @@
 docker:
  pull_tag: rocm/pytorch-xdit:v25.13
  docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
  ROCm: 7.11.0
  whats_new:
    - "Flux.1 Kontext support"
    - "Flux.2 Dev support"
    - "Flux FP8 GEMM support"
    - "Hybrid FP8 attention support for Wan models"
  components:
    TheRock: 
      version: 1728a81
      url: https://github.com/ROCm/TheRock
    rccl:
      version: d23d18f
      url: https://github.com/ROCm/rccl
    composable_kernel:
      version: ab0101c
      url: https://github.com/ROCm/composable_kernel
    rocm-libraries:
      version: a2f7c35
      url: https://github.com/ROCm/rocm-libraries
    rocm-systems:
      version: 659737c
      url: https://github.com/ROCm/rocm-systems
    torch:
      version: 91be249
      url: https://github.com/ROCm/pytorch
    torchvision:
      version: b919bd0
      url: https://github.com/pytorch/vision
    triton:
      version: a272dfa
      url: https://github.com/ROCm/triton
    accelerate:
      version: b521400f
      url: https://github.com/huggingface/accelerate
    aiter:
      version: de14bec0
      url: https://github.com/ROCm/aiter
    diffusers:
      version: a1f36ee3e
      url: https://github.com/huggingface/diffusers
    xfuser:
      version: adf2681
      url: https://github.com/xdit-project/xDiT
    yunchang:
      version: 2c9b712
      url: https://github.com/feifeibear/long-context-attention
  supported_models:
    - group: Hunyuan Video
      js_tag: hunyuan
      models:
        - model: Hunyuan Video
          model_repo: tencent/HunyuanVideo
          revision: refs/pr/18
          url: https://huggingface.co/tencent/HunyuanVideo
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
          js_tag: hunyuan_tag
    - group: Wan-AI
      js_tag: wan
      models:
        - model: Wan2.1
          model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
          js_tag: wan_21_tag
        - model: Wan2.2
          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
          js_tag: wan_22_tag
    - group: FLUX
      js_tag: flux
      models:
        - model: FLUX.1
          model_repo: black-forest-labs/FLUX.1-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
          js_tag: flux_1_tag
        - model: FLUX.1 Kontext
          model_repo: black-forest-labs/FLUX.1-Kontext-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux_kontext
          js_tag: flux_1_kontext_tag
        - model: FLUX.2
          model_repo: black-forest-labs/FLUX.2-dev
          url: https://huggingface.co/black-forest-labs/FLUX.2-dev
          github: https://github.com/black-forest-labs/flux2
          mad_tag: pyt_xdit_flux_2
          js_tag: flux_2_tag
    - group: StableDiffusion
      js_tag: stablediffusion
      models:
        - model: stable-diffusion-3.5-large
          model_repo: stabilityai/stable-diffusion-3.5-large
          url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
          github: https://github.com/Stability-AI/sd3.5
          mad_tag: pyt_xdit_sd_3_5
          js_tag: stable_diffusion_3_5_large_tag
--- a/docs/data/how-to/rocm-for-ai/training/jax-maxtext-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/jax-maxtext-benchmark-models.yaml
@@ -1,64 +0,0 @@
 dockers:
  - pull_tag: rocm/jax-training:maxtext-v25.11
    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.11/images/sha256-18e4d8f0b8ce7a7422c58046940dd5f32249960449fca09a562b65fb8eb1562a
    components:
      ROCm: 7.1.0
      JAX: 0.7.1
      Python: 3.12
      Transformer Engine: 2.4.0.dev0+281042de
      hipBLASLt: 1.2.x
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 2 7B
        mad_tag: jax_maxtext_train_llama-2-7b
        model_repo: Llama-2-7B
        precision: bf16
        multinode_training_script: llama2_7b_multinode.sh
        doc_options: ["single-node", "multi-node"]
      - model: Llama 2 70B
        mad_tag: jax_maxtext_train_llama-2-70b
        model_repo: Llama-2-70B
        precision: bf16
        multinode_training_script: llama2_70b_multinode.sh
        doc_options: ["single-node", "multi-node"]
      - model: Llama 3 8B (multi-node)
        mad_tag: jax_maxtext_train_llama-3-8b
        multinode_training_script: llama3_8b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 3 70B (multi-node)
        mad_tag: jax_maxtext_train_llama-3-70b
        multinode_training_script: llama3_70b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 3.1 8B
        mad_tag: jax_maxtext_train_llama-3.1-8b
        model_repo: Llama-3.1-8B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.1 70B
        mad_tag: jax_maxtext_train_llama-3.1-70b
        model_repo: Llama-3.1-70B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.3 70B
        mad_tag: jax_maxtext_train_llama-3.3-70b
        model_repo: Llama-3.3-70B
        precision: bf16
        doc_options: ["single-node"]
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V2-Lite (16B)
        mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
        model_repo: DeepSeek-V2-lite
        precision: bf16
        doc_options: ["single-node"]
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: jax_maxtext_train_mixtral-8x7b
        model_repo: Mixtral-8x7B
        precision: bf16
        doc_options: ["single-node"]
--- a/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
@@ -1,17 +1,15 @@
-docker:
+dockers:
-  pull_tag: rocm/primus:v25.10
+  - pull_tag: rocm/megatron-lm:v25.7_py310
-  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
-  components:
+    components:
-    ROCm: 7.1.0
+      ROCm: 6.4.2
-    Primus: 0.3.0
+      Primus: v0.1.0-rc1
-    Primus Turbo: 0.1.1
+      PyTorch: 2.8.0a0+gitd06a406
-    PyTorch: 2.10.0.dev20251112+rocm7.1
+      Python: "3.10"
-    Python: "3.10"
+      Transformer Engine: 2.1.0.dev0+ba586519
-    Transformer Engine: 2.4.0.dev0+32e2d1d4
+      hipBLASLt: 37ba1d36
-    Flash Attention: 2.8.3
+      Triton: 3.3.0
-    hipBLASLt: 1.2.0-09ab7153e2
+      RCCL: 2.22.3
    Triton: 3.4.0
    RCCL: 2.27.7
 model_groups:
  - group: Meta Llama
    tag: llama
@@ -22,6 +20,8 @@ model_groups:
        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
      - model: Llama 3.1 70B
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
      - model: Llama 3.1 70B (proxy)
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
      - model: Llama 2 7B
        mad_tag: pyt_megatron_lm_train_llama-2-7b
      - model: Llama 2 70B
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/jax-maxtext-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/jax-maxtext-v25.7-benchmark-models.yaml
@@ -1,72 +0,0 @@
 dockers:
  - pull_tag: rocm/jax-training:maxtext-v25.7-jax060
    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
    components:
      ROCm: 6.4.1
      JAX: 0.6.0
      Python: 3.10.12
      Transformer Engine: 2.1.0+90d703dd
      hipBLASLt: 1.1.0-499ece1c21
  - pull_tag: rocm/jax-training:maxtext-v25.7
    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.7/images/sha256-45f4c727d4019a63fc47313d3a5f5a5105569539294ddfd2d742218212ae9025
    components:
      ROCm: 6.4.1
      JAX: 0.5.0
      Python: 3.10.12
      Transformer Engine: 2.1.0+90d703dd
      hipBLASLt: 1.x.x
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: jax_maxtext_train_llama-3.3-70b
        model_repo: Llama-3.3-70B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.1 8B
        mad_tag: jax_maxtext_train_llama-3.1-8b
        model_repo: Llama-3.1-8B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.1 70B
        mad_tag: jax_maxtext_train_llama-3.1-70b
        model_repo: Llama-3.1-70B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3 8B
        mad_tag: jax_maxtext_train_llama-3-8b
        multinode_training_script: llama3_8b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 3 70B
        mad_tag: jax_maxtext_train_llama-3-70b
        multinode_training_script: llama3_70b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 2 7B
        mad_tag: jax_maxtext_train_llama-2-7b
        model_repo: Llama-2-7B
        precision: bf16
        multinode_training_script: llama2_7b_multinode.sh
        doc_options: ["single-node", "multi-node"]
      - model: Llama 2 70B
        mad_tag: jax_maxtext_train_llama-2-70b
        model_repo: Llama-2-70B
        precision: bf16
        multinode_training_script: llama2_70b_multinode.sh
        doc_options: ["single-node", "multi-node"]
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V2-Lite (16B)
        mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
        model_repo: DeepSeek-V2-lite
        precision: bf16
        doc_options: ["single-node"]
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: jax_maxtext_train_mixtral-8x7b
        model_repo: Mixtral-8x7B
        precision: bf16
        doc_options: ["single-node"]
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/jax-maxtext-v25.9-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/jax-maxtext-v25.9-benchmark-models.yaml
@@ -1,64 +0,0 @@
 dockers:
  - pull_tag: rocm/jax-training:maxtext-v25.9.1
    docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.9.1/images/sha256-60946cfbd470f6ee361fc9da740233a4fb2e892727f01719145b1f7627a1cff6
    components:
      ROCm: 7.0.0
      JAX: 0.6.2
      Python: 3.10.18
      Transformer Engine: 2.2.0.dev0+c91bac54
      hipBLASLt: 1.x.x
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 2 7B
        mad_tag: jax_maxtext_train_llama-2-7b
        model_repo: Llama-2-7B
        precision: bf16
        multinode_training_script: llama2_7b_multinode.sh
        doc_options: ["single-node", "multi-node"]
      - model: Llama 2 70B
        mad_tag: jax_maxtext_train_llama-2-70b
        model_repo: Llama-2-70B
        precision: bf16
        multinode_training_script: llama2_70b_multinode.sh
        doc_options: ["single-node", "multi-node"]
      - model: Llama 3 8B (multi-node)
        mad_tag: jax_maxtext_train_llama-3-8b
        multinode_training_script: llama3_8b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 3 70B (multi-node)
        mad_tag: jax_maxtext_train_llama-3-70b
        multinode_training_script: llama3_70b_multinode.sh
        doc_options: ["multi-node"]
      - model: Llama 3.1 8B
        mad_tag: jax_maxtext_train_llama-3.1-8b
        model_repo: Llama-3.1-8B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.1 70B
        mad_tag: jax_maxtext_train_llama-3.1-70b
        model_repo: Llama-3.1-70B
        precision: bf16
        doc_options: ["single-node"]
      - model: Llama 3.3 70B
        mad_tag: jax_maxtext_train_llama-3.3-70b
        model_repo: Llama-3.3-70B
        precision: bf16
        doc_options: ["single-node"]
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V2-Lite (16B)
        mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
        model_repo: DeepSeek-V2-lite
        precision: bf16
        doc_options: ["single-node"]
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: jax_maxtext_train_mixtral-8x7b
        model_repo: Mixtral-8x7B
        precision: bf16
        doc_options: ["single-node"]
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.10-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.10-benchmark-models.yaml
@@ -1,49 +0,0 @@
 docker:
  pull_tag: rocm/primus:v25.10
  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
  components:
    ROCm: 7.1.0
    Primus: 0.3.0
    Primus Turbo: 0.1.1
    PyTorch: 2.10.0.dev20251112+rocm7.1
    Python: "3.10"
    Transformer Engine: 2.4.0.dev0+32e2d1d4
    Flash Attention: 2.8.3
    hipBLASLt: 1.2.0-09ab7153e2
    Triton: 3.4.0
    RCCL: 2.27.7
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
      - model: Llama 3.1 8B
        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
      - model: Llama 3.1 70B
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
      - model: Llama 2 7B
        mad_tag: pyt_megatron_lm_train_llama-2-7b
      - model: Llama 2 70B
        mad_tag: pyt_megatron_lm_train_llama-2-70b
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
      - model: DeepSeek-V2-Lite
        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
      - model: Mixtral 8x22B (proxy)
        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: pyt_megatron_lm_train_qwen2.5-7b
      - model: Qwen 2.5 72B
        mad_tag: pyt_megatron_lm_train_qwen2.5-72b
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.7-benchmark-models.yaml
@@ -1,49 +0,0 @@
 dockers:
  - pull_tag: rocm/megatron-lm:v25.7_py310
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
    components:
      ROCm: 6.4.2
      Primus: v0.1.0-rc1
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
      Transformer Engine: 2.1.0.dev0+ba586519
      hipBLASLt: 37ba1d36
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
      - model: Llama 3.1 8B
        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
      - model: Llama 3.1 70B
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
      - model: Llama 3.1 70B (proxy)
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
      - model: Llama 2 7B
        mad_tag: pyt_megatron_lm_train_llama-2-7b
      - model: Llama 2 70B
        mad_tag: pyt_megatron_lm_train_llama-2-70b
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
      - model: DeepSeek-V2-Lite
        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
      - model: Mixtral 8x22B (proxy)
        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: pyt_megatron_lm_train_qwen2.5-7b
      - model: Qwen 2.5 72B
        mad_tag: pyt_megatron_lm_train_qwen2.5-72b
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.8-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.8-benchmark-models.yaml
@@ -1,48 +0,0 @@
 dockers:
  - pull_tag: rocm/megatron-lm:v25.8_py310
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
    components:
      ROCm: 6.4.3
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      hipBLASLt: d1b517fc7a
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
      - model: Llama 3.1 8B
        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
      - model: Llama 3.1 70B
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
      - model: Llama 3.1 70B (proxy)
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
      - model: Llama 2 7B
        mad_tag: pyt_megatron_lm_train_llama-2-7b
      - model: Llama 2 70B
        mad_tag: pyt_megatron_lm_train_llama-2-70b
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
      - model: DeepSeek-V2-Lite
        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
      - model: Mixtral 8x22B (proxy)
        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: pyt_megatron_lm_train_qwen2.5-7b
      - model: Qwen 2.5 72B
        mad_tag: pyt_megatron_lm_train_qwen2.5-72b
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.9-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/megatron-lm-v25.9-benchmark-models.yaml
@@ -1,53 +0,0 @@
 dockers:
  MI355X and MI350X:
    pull_tag: rocm/megatron-lm:v25.9_gfx950
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
    components: &docker_components
      ROCm: 7.0.0
      Primus: aab4234
      PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      Flash Attention: 2.8.3
      hipBLASLt: 911283acd1
      Triton: 3.4.0+rocm7.0.0.git56765e8c
      RCCL: 2.26.6
  MI325X and MI300X:
    pull_tag: rocm/megatron-lm:v25.9_gfx942
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
    components: *docker_components
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
      - model: Llama 3.1 8B
        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
      - model: Llama 3.1 70B
        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
      - model: Llama 2 7B
        mad_tag: pyt_megatron_lm_train_llama-2-7b
      - model: Llama 2 70B
        mad_tag: pyt_megatron_lm_train_llama-2-70b
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
      - model: DeepSeek-V2-Lite
        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
      - model: Mixtral 8x22B (proxy)
        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: pyt_megatron_lm_train_qwen2.5-7b
      - model: Qwen 2.5 72B
        mad_tag: pyt_megatron_lm_train_qwen2.5-72b
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.10-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.10-benchmark-models.yaml
@@ -1,58 +0,0 @@
 docker:
  pull_tag: rocm/primus:v25.10
  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
  components:
    ROCm: 7.1.0
    PyTorch: 2.10.0.dev20251112+rocm7.1
    Python: "3.10"
    Transformer Engine: 2.4.0.dev0+32e2d1d4
    Flash Attention: 2.8.3
    hipBLASLt: 1.2.0-09ab7153e2
    Triton: 3.4.0
    RCCL: 2.27.7
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
        config_name: llama3.3_70B-pretrain.yaml
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
        config_name: llama3.1_70B-pretrain.yaml
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
        config_name: llama3.1_8B-pretrain.yaml
      - model: Llama 2 7B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
        config_name: llama2_7B-pretrain.yaml
      - model: Llama 2 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
        config_name: llama2_70B-pretrain.yaml
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
        config_name: deepseek_v3-pretrain.yaml
      - model: DeepSeek-V2-Lite
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
        config_name: deepseek_v2_lite-pretrain.yaml
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
        config_name: mixtral_8x7B_v0.1-pretrain.yaml
      - model: Mixtral 8x22B (proxy)
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
        config_name: mixtral_8x22B_v0.1-pretrain.yaml
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
        config_name: primus_qwen2.5_7B-pretrain.yaml
      - model: Qwen 2.5 72B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
        config_name: qwen2.5_72B-pretrain.yaml
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.7-benchmark-models.yaml
@@ -1,58 +0,0 @@
 dockers:
  - pull_tag: rocm/megatron-lm:v25.7_py310
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
    components:
      ROCm: 6.4.2
      Primus: v0.1.0-rc1
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
      Transformer Engine: 2.1.0.dev0+ba586519
      hipBLASLt: 37ba1d36
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
        config_name: llama3.3_70B-pretrain.yaml
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
        config_name: llama3.1_70B-pretrain.yaml
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
        config_name: llama3.1_8B-pretrain.yaml
      - model: Llama 2 7B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
        config_name: llama2_7B-pretrain.yaml
      - model: Llama 2 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
        config_name: llama2_70B-pretrain.yaml
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
        config_name: deepseek_v3-pretrain.yaml
      - model: DeepSeek-V2-Lite
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
        config_name: deepseek_v2_lite-pretrain.yaml
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
        config_name: mixtral_8x7B_v0.1-pretrain.yaml
      - model: Mixtral 8x22B (proxy)
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
        config_name: mixtral_8x22B_v0.1-pretrain.yaml
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
        config_name: primus_qwen2.5_7B-pretrain.yaml
      - model: Qwen 2.5 72B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
        config_name: qwen2.5_72B-pretrain.yaml
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.8-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.8-benchmark-models.yaml
@@ -1,58 +0,0 @@
 dockers:
  - pull_tag: rocm/megatron-lm:v25.8_py310
    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.8_py310/images/sha256-50fc824361054e445e86d5d88d5f58817f61f8ec83ad4a7e43ea38bbc4a142c0
    components:
      ROCm: 6.4.3
      Primus: 927a717
      PyTorch: 2.8.0a0+gitd06a406
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      hipBLASLt: d1b517fc7a
      Triton: 3.3.0
      RCCL: 2.22.3
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
        config_name: llama3.3_70B-pretrain.yaml
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
        config_name: llama3.1_70B-pretrain.yaml
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
        config_name: llama3.1_8B-pretrain.yaml
      - model: Llama 2 7B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
        config_name: llama2_7B-pretrain.yaml
      - model: Llama 2 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
        config_name: llama2_70B-pretrain.yaml
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
        config_name: deepseek_v3-pretrain.yaml
      - model: DeepSeek-V2-Lite
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
        config_name: deepseek_v2_lite-pretrain.yaml
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
        config_name: mixtral_8x7B_v0.1-pretrain.yaml
      - model: Mixtral 8x22B (proxy)
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
        config_name: mixtral_8x22B_v0.1-pretrain.yaml
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
        config_name: primus_qwen2.5_7B-pretrain.yaml
      - model: Qwen 2.5 72B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
        config_name: qwen2.5_72B-pretrain.yaml
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.9-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-megatron-v25.9-benchmark-models.yaml
@@ -1,65 +0,0 @@
 dockers:
  MI355X and MI350X:
    pull_tag: rocm/primus:v25.9_gfx950
    docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
    components: &docker_components
      ROCm: 7.0.0
      Primus: 0.3.0
      Primus Turbo: 0.1.1
      PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      Flash Attention: 2.8.3
      hipBLASLt: 911283acd1
      Triton: 3.4.0+rocm7.0.0.git56765e8c
      RCCL: 2.26.6
  MI325X and MI300X:
    pull_tag: rocm/primus:v25.9_gfx942
    docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
    components: *docker_components
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.3 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
        config_name: llama3.3_70B-pretrain.yaml
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
        config_name: llama3.1_70B-pretrain.yaml
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
        config_name: llama3.1_8B-pretrain.yaml
      - model: Llama 2 7B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
        config_name: llama2_7B-pretrain.yaml
      - model: Llama 2 70B
        mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
        config_name: llama2_70B-pretrain.yaml
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek-V3 (proxy)
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
        config_name: deepseek_v3-pretrain.yaml
      - model: DeepSeek-V2-Lite
        mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
        config_name: deepseek_v2_lite-pretrain.yaml
  - group: Mistral AI
    tag: mistral
    models:
      - model: Mixtral 8x7B
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
        config_name: mixtral_8x7B_v0.1-pretrain.yaml
      - model: Mixtral 8x22B (proxy)
        mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
        config_name: mixtral_8x22B_v0.1-pretrain.yaml
  - group: Qwen
    tag: qwen
    models:
      - model: Qwen 2.5 7B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
        config_name: primus_qwen2.5_7B-pretrain.yaml
      - model: Qwen 2.5 72B
        mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
        config_name: qwen2.5_72B-pretrain.yaml
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.10-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.10-benchmark-models.yaml
@@ -1,32 +0,0 @@
 docker:
  pull_tag: rocm/primus:v25.10
  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
  components:
    ROCm: 7.1.0
    PyTorch: 2.10.0.dev20251112+rocm7.1
    Python: "3.10"
    Transformer Engine: 2.4.0.dev0+32e2d1d4
    Flash Attention: 2.8.3
    hipBLASLt: 1.2.0-09ab7153e2
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_train_llama-3.1-8b
        model_repo: Llama-3.1-8B
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: BF16
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_train_llama-3.1-70b
        model_repo: Llama-3.1-70B
        url: https://huggingface.co/meta-llama/Llama-3.1-70B
        precision: BF16
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek V2 16B
        mad_tag: primus_pyt_train_deepseek-v2
        model_repo: DeepSeek-V2
        url: https://huggingface.co/deepseek-ai/DeepSeek-V2
        precision: BF16
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.8-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.8-benchmark-models.yaml
@@ -1,24 +0,0 @@
 dockers:
  - pull_tag: rocm/pytorch-training:v25.8
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
    components:
      ROCm: 6.4.3
      PyTorch: 2.8.0a0+gitd06a406
      Python: 3.10.18
      Transformer Engine: 2.2.0.dev0+a1e66aae
      Flash Attention: 3.0.0.post1
      hipBLASLt: 1.1.0-d1b517fc7a
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 3.1 8B
      mad_tag: primus_pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
    - model: Llama 3.1 70B
      mad_tag: primus_pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B
      precision: BF16
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.9-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/primus-pytorch-v25.9-benchmark-models.yaml
@@ -1,39 +0,0 @@
 dockers:
  MI355X and MI350X:
    pull_tag: rocm/primus:v25.9_gfx950
    docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
    components: &docker_components
      ROCm: 7.0.0
      Primus: 0.3.0
      Primus Turbo: 0.1.1
      PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      Flash Attention: 2.8.3
      hipBLASLt: 911283acd1
      Triton: 3.4.0+rocm7.0.0.git56765e8c
      RCCL: 2.26.6
  MI325X and MI300X:
    pull_tag: rocm/primus:v25.9_gfx942
    docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
    components: *docker_components
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 3.1 8B
      mad_tag: primus_pyt_train_llama-3.1-8b
      model_repo: meta-llama/Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      config_file:
        bf16: "./llama3_8b_fsdp_bf16.toml"
        fp8: "./llama3_8b_fsdp_fp8.toml"
    - model: Llama 3.1 70B
      mad_tag: primus_pyt_train_llama-3.1-70b
      model_repo: meta-llama/Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B
      precision: BF16
      config_file:
        bf16: "./llama3_70b_fsdp_bf16.toml"
        fp8: "./llama3_70b_fsdp_fp8.toml"
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.10-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.10-benchmark-models.yaml
@@ -1,197 +0,0 @@
 docker:
  pull_tag: rocm/primus:v25.10
  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
  components:
    ROCm: 7.1.0
    Primus: 0.3.0
    Primus Turbo: 0.1.1
    PyTorch: 2.10.0.dev20251112+rocm7.1
    Python: "3.10"
    Transformer Engine: 2.4.0.dev0+32e2d1d4
    Flash Attention: 2.8.3
    hipBLASLt: 1.2.0-09ab7153e2
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
      model_repo: Llama-4-17B_16E
      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.3 70B
      mad_tag: pyt_train_llama-3.3-70b
      model_repo: Llama-3.3-70B
      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.2 1B
      mad_tag: pyt_train_llama-3.2-1b
      model_repo: Llama-3.2-1B
      url: https://huggingface.co/meta-llama/Llama-3.2-1B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 3B
      mad_tag: pyt_train_llama-3.2-3b
      model_repo: Llama-3.2-3B
      url: https://huggingface.co/meta-llama/Llama-3.2-3B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 Vision 11B
      mad_tag: pyt_train_llama-3.2-vision-11b
      model_repo: Llama-3.2-Vision-11B
      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.2 Vision 90B
      mad_tag: pyt_train_llama-3.2-vision-90b
      model_repo: Llama-3.2-Vision-90B
      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
      training_modes: [finetune_qlora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3 70B
      mad_tag: pyt_train_llama-3-70b
      model_repo: Llama-3-70B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 7B
      mad_tag: pyt_train_llama-2-7b
      model_repo: Llama-2-7B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 2 13B
      mad_tag: pyt_train_llama-2-13b
      model_repo: Llama-2-13B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 70B
      mad_tag: pyt_train_llama-2-70b
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_lora, finetune_qlora]
  - group: OpenAI
    tag: openai
    models:
    - model: GPT OSS 20B
      mad_tag: pyt_train_gpt_oss_20b
      model_repo: GPT-OSS-20B
      url: https://huggingface.co/openai/gpt-oss-20b
      precision: BF16
      training_modes: [HF_finetune_lora]
    - model: GPT OSS 120B
      mad_tag: pyt_train_gpt_oss_120b
      model_repo: GPT-OSS-120B
      url: https://huggingface.co/openai/gpt-oss-120b
      precision: BF16
      training_modes: [HF_finetune_lora]
  - group: DeepSeek
    tag: deepseek
    models:
    - model: DeepSeek V2 16B
      mad_tag: primus_pyt_train_deepseek-v2
      model_repo: DeepSeek-V2
      url: https://huggingface.co/deepseek-ai/DeepSeek-V2
      precision: BF16
      training_modes: [pretrain]
  - group: Qwen
    tag: qwen
    models:
    - model: Qwen 3 8B
      mad_tag: pyt_train_qwen3-8b
      model_repo: Qwen3-8B
      url: https://huggingface.co/Qwen/Qwen3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 3 32B
      mad_tag: pyt_train_qwen3-32b
      model_repo: Qwen3-32
      url: https://huggingface.co/Qwen/Qwen3-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 32B
      mad_tag: pyt_train_qwen2.5-32b
      model_repo: Qwen2.5-32B
      url: https://huggingface.co/Qwen/Qwen2.5-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 72B
      mad_tag: pyt_train_qwen2.5-72b
      model_repo: Qwen2.5-72B
      url: https://huggingface.co/Qwen/Qwen2.5-72B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2 1.5B
      mad_tag: pyt_train_qwen2-1.5b
      model_repo: Qwen2-1.5B
      url: https://huggingface.co/Qwen/Qwen2-1.5B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 2 7B
      mad_tag: pyt_train_qwen2-7b
      model_repo: Qwen2-7B
      url: https://huggingface.co/Qwen/Qwen2-7B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
  - group: Stable Diffusion
    tag: sd
    models:
    - model: Stable Diffusion XL
      mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
      model_repo: SDXL
      url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
      precision: BF16
      training_modes: [posttrain]
  - group: Flux
    tag: flux
    models:
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [posttrain]
  - group: NCF
    tag: ncf
    models:
    - model: NCF
      mad_tag: pyt_ncf_training
      model_repo:
      url: https://github.com/ROCm/FluxBenchmark
      precision: FP32
  - group: DLRM
    tag: dlrm
    models:
    - model: DLRM v2
      mad_tag: pyt_train_dlrm
      model_repo: DLRM
      url: https://github.com/AMD-AGI/DLRMBenchmark
      training_modes: [pretrain]
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.6-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.6-benchmark-models.yaml
@@ -1,120 +0,0 @@
 unified_docker:
  latest:
    pull_tag: rocm/pytorch-training:v25.6
    docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
    rocm_version: 6.4.1
    pytorch_version: 2.8.0a0+git7d205b2
    python_version: 3.10.17
    transformer_engine_version: 1.14.0+2f85f5f2
    flash_attention_version: 3.0.0.post1
    hipblaslt_version: 0.15.0-8c6919d
    triton_version: 3.3.0
 model_groups:
  - group: Pre-training
    tag: pre-training
    models:
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain]
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [pretrain]
  - group: Fine-tuning
    tag: fine-tuning
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
      model_repo: Llama-4-17B_16E
      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.3 70B
      mad_tag: pyt_train_llama-3.3-70b
      model_repo: Llama-3.3-70B
      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.2 1B
      mad_tag: pyt_train_llama-3.2-1b
      model_repo: Llama-3.2-1B
      url: https://huggingface.co/meta-llama/Llama-3.2-1B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 3B
      mad_tag: pyt_train_llama-3.2-3b
      model_repo: Llama-3.2-3B
      url: https://huggingface.co/meta-llama/Llama-3.2-3B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 Vision 11B
      mad_tag: pyt_train_llama-3.2-vision-11b
      model_repo: Llama-3.2-Vision-11B
      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.2 Vision 90B
      mad_tag: pyt_train_llama-3.2-vision-90b
      model_repo: Llama-3.2-Vision-90B
      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
      training_modes: [finetune_qlora, HF_finetune_lora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3 70B
      mad_tag: pyt_train_llama-3-70b
      model_repo: Llama-3-70B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 7B
      mad_tag: pyt_train_llama-2-7b
      model_repo: Llama-2-7B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 2 13B
      mad_tag: pyt_train_llama-2-13b
      model_repo: Llama-2-13B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 70B
      mad_tag: pyt_train_llama-2-70b
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.7-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.7-benchmark-models.yaml
@@ -1,162 +0,0 @@
 dockers:
  - pull_tag: rocm/pytorch-training:v25.7
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.7/images/sha256-cc6fd840ab89cb81d926fc29eca6d075aee9875a55a522675a4b9231c9a0a712
    components:
      ROCm: 6.4.2
      PyTorch: 2.8.0a0+gitd06a406
      Python: 3.10.18
      Transformer Engine: 2.2.0.dev0+94e53dd8
      Flash Attention: 3.0.0.post1
      hipBLASLt: 1.1.0-4b9a52edfc
      Triton: 3.3.0
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
      model_repo: Llama-4-17B_16E
      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.3 70B
      mad_tag: pyt_train_llama-3.3-70b
      model_repo: Llama-3.3-70B
      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.2 1B
      mad_tag: pyt_train_llama-3.2-1b
      model_repo: Llama-3.2-1B
      url: https://huggingface.co/meta-llama/Llama-3.2-1B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 3B
      mad_tag: pyt_train_llama-3.2-3b
      model_repo: Llama-3.2-3B
      url: https://huggingface.co/meta-llama/Llama-3.2-3B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 Vision 11B
      mad_tag: pyt_train_llama-3.2-vision-11b
      model_repo: Llama-3.2-Vision-11B
      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.2 Vision 90B
      mad_tag: pyt_train_llama-3.2-vision-90b
      model_repo: Llama-3.2-Vision-90B
      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
      training_modes: [finetune_qlora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3 70B
      mad_tag: pyt_train_llama-3-70b
      model_repo: Llama-3-70B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 7B
      mad_tag: pyt_train_llama-2-7b
      model_repo: Llama-2-7B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 2 13B
      mad_tag: pyt_train_llama-2-13b
      model_repo: Llama-2-13B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 70B
      mad_tag: pyt_train_llama-2-70b
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_lora, finetune_qlora]
  - group: OpenAI
    tag: openai
    models:
    - model: GPT OSS 20B
      mad_tag: pyt_train_gpt_oss_20b
      model_repo: GPT-OSS-20B
      url: https://huggingface.co/openai/gpt-oss-20b
      precision: BF16
      training_modes: [HF_finetune_lora]
    - model: GPT OSS 120B
      mad_tag: pyt_train_gpt_oss_120b
      model_repo: GPT-OSS-120B
      url: https://huggingface.co/openai/gpt-oss-120b
      precision: BF16
      training_modes: [HF_finetune_lora]
  - group: Qwen
    tag: qwen
    models:
    - model: Qwen 3 8B
      mad_tag: pyt_train_qwen3-8b
      model_repo: Qwen3-8B
      url: https://huggingface.co/Qwen/Qwen3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 3 32B
      mad_tag: pyt_train_qwen3-32b
      model_repo: Qwen3-32
      url: https://huggingface.co/Qwen/Qwen3-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 32B
      mad_tag: pyt_train_qwen2.5-32b
      model_repo: Qwen2.5-32B
      url: https://huggingface.co/Qwen/Qwen2.5-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 72B
      mad_tag: pyt_train_qwen2.5-72b
      model_repo: Qwen2.5-72B
      url: https://huggingface.co/Qwen/Qwen2.5-72B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2 1.5B
      mad_tag: pyt_train_qwen2-1.5b
      model_repo: Qwen2-1.5B
      url: https://huggingface.co/Qwen/Qwen2-1.5B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 2 7B
      mad_tag: pyt_train_qwen2-7b
      model_repo: Qwen2-7B
      url: https://huggingface.co/Qwen/Qwen2-7B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
  - group: Flux
    tag: flux
    models:
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [pretrain]
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.8-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.8-benchmark-models.yaml
@@ -1,178 +0,0 @@
 dockers:
  - pull_tag: rocm/pytorch-training:v25.8
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
    components:
      ROCm: 6.4.3
      PyTorch: 2.8.0a0+gitd06a406
      Python: 3.10.18
      Transformer Engine: 2.2.0.dev0+a1e66aae
      Flash Attention: 3.0.0.post1
      hipBLASLt: 1.1.0-d1b517fc7a
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
      model_repo: Llama-4-17B_16E
      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.3 70B
      mad_tag: pyt_train_llama-3.3-70b
      model_repo: Llama-3.3-70B
      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.2 1B
      mad_tag: pyt_train_llama-3.2-1b
      model_repo: Llama-3.2-1B
      url: https://huggingface.co/meta-llama/Llama-3.2-1B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 3B
      mad_tag: pyt_train_llama-3.2-3b
      model_repo: Llama-3.2-3B
      url: https://huggingface.co/meta-llama/Llama-3.2-3B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 Vision 11B
      mad_tag: pyt_train_llama-3.2-vision-11b
      model_repo: Llama-3.2-Vision-11B
      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.2 Vision 90B
      mad_tag: pyt_train_llama-3.2-vision-90b
      model_repo: Llama-3.2-Vision-90B
      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
      training_modes: [finetune_qlora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3 70B
      mad_tag: pyt_train_llama-3-70b
      model_repo: Llama-3-70B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 7B
      mad_tag: pyt_train_llama-2-7b
      model_repo: Llama-2-7B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 2 13B
      mad_tag: pyt_train_llama-2-13b
      model_repo: Llama-2-13B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 70B
      mad_tag: pyt_train_llama-2-70b
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_lora, finetune_qlora]
  - group: OpenAI
    tag: openai
    models:
    - model: GPT OSS 20B
      mad_tag: pyt_train_gpt_oss_20b
      model_repo: GPT-OSS-20B
      url: https://huggingface.co/openai/gpt-oss-20b
      precision: BF16
      training_modes: [HF_finetune_lora]
    - model: GPT OSS 120B
      mad_tag: pyt_train_gpt_oss_120b
      model_repo: GPT-OSS-120B
      url: https://huggingface.co/openai/gpt-oss-120b
      precision: BF16
      training_modes: [HF_finetune_lora]
  - group: Qwen
    tag: qwen
    models:
    - model: Qwen 3 8B
      mad_tag: pyt_train_qwen3-8b
      model_repo: Qwen3-8B
      url: https://huggingface.co/Qwen/Qwen3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 3 32B
      mad_tag: pyt_train_qwen3-32b
      model_repo: Qwen3-32
      url: https://huggingface.co/Qwen/Qwen3-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 32B
      mad_tag: pyt_train_qwen2.5-32b
      model_repo: Qwen2.5-32B
      url: https://huggingface.co/Qwen/Qwen2.5-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 72B
      mad_tag: pyt_train_qwen2.5-72b
      model_repo: Qwen2.5-72B
      url: https://huggingface.co/Qwen/Qwen2.5-72B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2 1.5B
      mad_tag: pyt_train_qwen2-1.5b
      model_repo: Qwen2-1.5B
      url: https://huggingface.co/Qwen/Qwen2-1.5B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 2 7B
      mad_tag: pyt_train_qwen2-7b
      model_repo: Qwen2-7B
      url: https://huggingface.co/Qwen/Qwen2-7B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
  - group: Stable Diffusion
    tag: sd
    models:
    - model: Stable Diffusion XL
      mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
      model_repo: SDXL
      url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
      precision: BF16
      training_modes: [finetune_lora]
  - group: Flux
    tag: flux
    models:
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [pretrain]
  - group: NCF
    tag: ncf
    models:
    - model: NCF
      mad_tag: pyt_ncf_training
      model_repo:
      url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
      precision: FP32
--- a/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.9-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.9-benchmark-models.yaml
@@ -1,186 +0,0 @@
 dockers:
  MI355X and MI350X:
    pull_tag: rocm/pytorch-training:v25.9_gfx950
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.9_gfx950/images/sha256-1a198be32f49efd66d0ff82066b44bd99b3e6b04c8e0e9b36b2c481e13bff7b6
    components: &docker_components
      ROCm: 7.0.0
      Primus: aab4234
      PyTorch: 2.9.0.dev20250821+rocm7.0.0.lw.git125803b7
      Python: "3.10"
      Transformer Engine: 2.2.0.dev0+54dd2bdc
      Flash Attention: 2.8.3
      hipBLASLt: 911283acd1
      Triton: 3.4.0+rocm7.0.0.git56765e8c
      RCCL: 2.26.6
  MI325X and MI300X:
    pull_tag: rocm/pytorch-training:v25.9_gfx942
    docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.9_gfx942/images/sha256-df6ab8f45b4b9ceb100fb24e19b2019a364e351ee3b324dbe54466a1d67f8357
    components: *docker_components
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
      model_repo: Llama-4-17B_16E
      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.3 70B
      mad_tag: pyt_train_llama-3.3-70b
      model_repo: Llama-3.3-70B
      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.2 1B
      mad_tag: pyt_train_llama-3.2-1b
      model_repo: Llama-3.2-1B
      url: https://huggingface.co/meta-llama/Llama-3.2-1B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 3B
      mad_tag: pyt_train_llama-3.2-3b
      model_repo: Llama-3.2-3B
      url: https://huggingface.co/meta-llama/Llama-3.2-3B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.2 Vision 11B
      mad_tag: pyt_train_llama-3.2-vision-11b
      model_repo: Llama-3.2-Vision-11B
      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.2 Vision 90B
      mad_tag: pyt_train_llama-3.2-vision-90b
      model_repo: Llama-3.2-Vision-90B
      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
      precision: BF16
      training_modes: [finetune_fw]
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain, finetune_fw, finetune_lora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
      training_modes: [finetune_qlora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3 70B
      mad_tag: pyt_train_llama-3-70b
      model_repo: Llama-3-70B
      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 7B
      mad_tag: pyt_train_llama-2-7b
      model_repo: Llama-2-7B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 2 13B
      mad_tag: pyt_train_llama-2-13b
      model_repo: Llama-2-13B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 2 70B
      mad_tag: pyt_train_llama-2-70b
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
      training_modes: [finetune_lora, finetune_qlora]
  - group: OpenAI
    tag: openai
    models:
    - model: GPT OSS 20B
      mad_tag: pyt_train_gpt_oss_20b
      model_repo: GPT-OSS-20B
      url: https://huggingface.co/openai/gpt-oss-20b
      precision: BF16
      training_modes: [HF_finetune_lora]
    - model: GPT OSS 120B
      mad_tag: pyt_train_gpt_oss_120b
      model_repo: GPT-OSS-120B
      url: https://huggingface.co/openai/gpt-oss-120b
      precision: BF16
      training_modes: [HF_finetune_lora]
  - group: Qwen
    tag: qwen
    models:
    - model: Qwen 3 8B
      mad_tag: pyt_train_qwen3-8b
      model_repo: Qwen3-8B
      url: https://huggingface.co/Qwen/Qwen3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 3 32B
      mad_tag: pyt_train_qwen3-32b
      model_repo: Qwen3-32
      url: https://huggingface.co/Qwen/Qwen3-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 32B
      mad_tag: pyt_train_qwen2.5-32b
      model_repo: Qwen2.5-32B
      url: https://huggingface.co/Qwen/Qwen2.5-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 72B
      mad_tag: pyt_train_qwen2.5-72b
      model_repo: Qwen2.5-72B
      url: https://huggingface.co/Qwen/Qwen2.5-72B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2 1.5B
      mad_tag: pyt_train_qwen2-1.5b
      model_repo: Qwen2-1.5B
      url: https://huggingface.co/Qwen/Qwen2-1.5B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 2 7B
      mad_tag: pyt_train_qwen2-7b
      model_repo: Qwen2-7B
      url: https://huggingface.co/Qwen/Qwen2-7B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
  - group: Stable Diffusion
    tag: sd
    models:
    - model: Stable Diffusion XL
      mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
      model_repo: SDXL
      url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
      precision: BF16
      training_modes: [posttrain-p]
  - group: Flux
    tag: flux
    models:
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [posttrain-p]
  - group: NCF
    tag: ncf
    models:
    - model: NCF
      mad_tag: pyt_ncf_training
      model_repo:
      url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
      precision: FP32
--- a/docs/data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
@@ -1,15 +1,15 @@
-docker:
+dockers:
-  pull_tag: rocm/primus:v25.11
+  - pull_tag: rocm/megatron-lm:v25.7_py310
-  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
+    docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
-  components:
+    components:
-    ROCm: 7.1.0
+      ROCm: 6.4.2
-    PyTorch: 2.10.0.dev20251112+rocm7.1
+      Primus: v0.1.0-rc1
-    Python: "3.10"
+      PyTorch: 2.8.0a0+gitd06a406
-    Transformer Engine: 2.4.0.dev0+32e2d1d4
+      Python: "3.10"
-    Flash Attention: 2.8.3
+      Transformer Engine: 2.1.0.dev0+ba586519
-    hipBLASLt: 1.2.0-09ab7153e2
+      hipBLASLt: 37ba1d36
-    Triton: 3.4.0
+      Triton: 3.3.0
-    RCCL: 2.27.7
+      RCCL: 2.22.3
 model_groups:
  - group: Meta Llama
    tag: llama
--- a/docs/data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
@@ -1,32 +0,0 @@
 docker:
  pull_tag: rocm/primus:v25.11
  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
  components:
    ROCm: 7.1.0
    PyTorch: 2.10.0.dev20251112+rocm7.1
    Python: "3.10"
    Transformer Engine: 2.4.0.dev0+32e2d1d4
    Flash Attention: 2.8.3
    hipBLASLt: 1.2.0-09ab7153e2
 model_groups:
  - group: Meta Llama
    tag: llama
    models:
      - model: Llama 3.1 8B
        mad_tag: primus_pyt_train_llama-3.1-8b
        model_repo: Llama-3.1-8B
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: BF16
      - model: Llama 3.1 70B
        mad_tag: primus_pyt_train_llama-3.1-70b
        model_repo: Llama-3.1-70B
        url: https://huggingface.co/meta-llama/Llama-3.1-70B
        precision: BF16
  - group: DeepSeek
    tag: deepseek
    models:
      - model: DeepSeek V3 16B
        mad_tag: primus_pyt_train_deepseek-v3-16b
        model_repo: DeepSeek-V3
        url: https://huggingface.co/deepseek-ai/DeepSeek-V3
        precision: BF16
--- a/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
@@ -1,18 +1,38 @@
-docker:
+unified_docker:
-  pull_tag: rocm/primus:v25.10
+  latest:
-  docker_hub_url: https://hub.docker.com/layers/rocm/primus/v25.10/images/sha256-140c37cd2eeeb183759b9622543fc03cc210dc97cbfa18eeefdcbda84420c197
+    pull_tag: rocm/pytorch-training:v25.6
-  components:
+    docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
-    ROCm: 7.1.0
+    rocm_version: 6.4.1
-    Primus: 0.3.0
+    pytorch_version: 2.8.0a0+git7d205b2
-    Primus Turbo: 0.1.1
+    python_version: 3.10.17
-    PyTorch: 2.10.0.dev20251112+rocm7.1
+    transformer_engine_version: 1.14.0+2f85f5f2
-    Python: "3.10"
+    flash_attention_version: 3.0.0.post1
-    Transformer Engine: 2.4.0.dev0+32e2d1d4
+    hipblaslt_version: 0.15.0-8c6919d
-    Flash Attention: 2.8.3
+    triton_version: 3.3.0
    hipBLASLt: 1.2.0-09ab7153e2
 model_groups:
-  - group: Meta Llama
+  - group: Pre-training
-    tag: llama
+    tag: pre-training
    models:
    - model: Llama 3.1 8B
      mad_tag: pyt_train_llama-3.1-8b
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
      training_modes: [pretrain]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
      precision: BF16
      training_modes: [pretrain]
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [pretrain]
  - group: Fine-tuning
    tag: fine-tuning
    models:
    - model: Llama 4 Scout 17B-16E
      mad_tag: pyt_train_llama-4-scout-17b-16e
@@ -55,19 +75,19 @@ model_groups:
      model_repo: Llama-3.1-8B
      url: https://huggingface.co/meta-llama/Llama-3.1-8B
      precision: BF16
-      training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
+      training_modes: [finetune_fw, finetune_lora]
    - model: Llama 3.1 70B
      mad_tag: pyt_train_llama-3.1-70b
      model_repo: Llama-3.1-70B
-      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
+      url: https://huggingface.co/meta-llama/Llama-3.1-70B
      precision: BF16
-      training_modes: [pretrain, finetune_fw, finetune_lora]
+      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
    - model: Llama 3.1 405B
      mad_tag: pyt_train_llama-3.1-405b
      model_repo: Llama-3.1-405B
      url: https://huggingface.co/meta-llama/Llama-3.1-405B
      precision: BF16
-      training_modes: [finetune_qlora]
+      training_modes: [finetune_qlora, HF_finetune_lora]
    - model: Llama 3 8B
      mad_tag: pyt_train_llama-3-8b
      model_repo: Llama-3-8B
@@ -97,101 +117,4 @@ model_groups:
      model_repo: Llama-2-70B
      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
      precision: BF16
-      training_modes: [finetune_lora, finetune_qlora]
+      training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]
  - group: OpenAI
    tag: openai
    models:
    - model: GPT OSS 20B
      mad_tag: pyt_train_gpt_oss_20b
      model_repo: GPT-OSS-20B
      url: https://huggingface.co/openai/gpt-oss-20b
      precision: BF16
      training_modes: [HF_finetune_lora]
    - model: GPT OSS 120B
      mad_tag: pyt_train_gpt_oss_120b
      model_repo: GPT-OSS-120B
      url: https://huggingface.co/openai/gpt-oss-120b
      precision: BF16
      training_modes: [HF_finetune_lora]
  - group: DeepSeek
    tag: deepseek
    models:
    - model: DeepSeek V2 16B
      mad_tag: primus_pyt_train_deepseek-v2
      model_repo: DeepSeek-V2
      url: https://huggingface.co/deepseek-ai/DeepSeek-V2
      precision: BF16
      training_modes: [pretrain]
  - group: Qwen
    tag: qwen
    models:
    - model: Qwen 3 8B
      mad_tag: pyt_train_qwen3-8b
      model_repo: Qwen3-8B
      url: https://huggingface.co/Qwen/Qwen3-8B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 3 32B
      mad_tag: pyt_train_qwen3-32b
      model_repo: Qwen3-32
      url: https://huggingface.co/Qwen/Qwen3-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 32B
      mad_tag: pyt_train_qwen2.5-32b
      model_repo: Qwen2.5-32B
      url: https://huggingface.co/Qwen/Qwen2.5-32B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2.5 72B
      mad_tag: pyt_train_qwen2.5-72b
      model_repo: Qwen2.5-72B
      url: https://huggingface.co/Qwen/Qwen2.5-72B
      precision: BF16
      training_modes: [finetune_lora]
    - model: Qwen 2 1.5B
      mad_tag: pyt_train_qwen2-1.5b
      model_repo: Qwen2-1.5B
      url: https://huggingface.co/Qwen/Qwen2-1.5B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
    - model: Qwen 2 7B
      mad_tag: pyt_train_qwen2-7b
      model_repo: Qwen2-7B
      url: https://huggingface.co/Qwen/Qwen2-7B
      precision: BF16
      training_modes: [finetune_fw, finetune_lora]
  - group: Stable Diffusion
    tag: sd
    models:
    - model: Stable Diffusion XL
      mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
      model_repo: SDXL
      url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
      precision: BF16
      training_modes: [posttrain]
  - group: Flux
    tag: flux
    models:
    - model: FLUX.1-dev
      mad_tag: pyt_train_flux
      model_repo: Flux
      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
      precision: BF16
      training_modes: [posttrain]
  - group: NCF
    tag: ncf
    models:
    - model: NCF
      mad_tag: pyt_ncf_training
      model_repo:
      url: https://github.com/ROCm/FluxBenchmark
      precision: FP32
  - group: DLRM
    tag: dlrm
    models:
    - model: DLRM v2
      mad_tag: pyt_train_dlrm
      model_repo: DLRM
      url: https://github.com/AMD-AGI/DLRMBenchmark
      training_modes: [pretrain]
--- a/Show More
+++ b/Show More