remove "mi300x" from fp4 inf page (#5385 )

[docs/7.0-beta] Add pointers to latest documentation
[docs/7.0-beta] Add SGLang benchmark doc (#5242 )
2026-01-10 15:18:11 -05:00 · 2025-09-18 12:29:42 -04:00 · 2025-09-18 12:16:54 -04:00 · 2025-09-02 09:17:25 -04:00 · 2025-08-28 23:24:44 -04:00 · 2025-08-08 10:11:05 -04:00
326 changed files with 3124 additions and 40625 deletions
--- a/.azuredevops/ci-builds/aomp-mainline.yml
+++ b/.azuredevops/ci-builds/aomp-mainline.yml
@@ -0,0 +1,42 @@
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml
+
+resources:
+  repositories:
+  - repository: aomp_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/aomp
+    ref: amd-mainline
+  - repository: aomp-extras_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/aomp-extras
+    ref: amd-mainline
+  - repository: flang_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/flang
+    ref: amd-mainline
+  - repository: llvm-project_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/llvm-project
+    ref: amd-mainline
+  pipelines:
+  - pipeline: rocr-runtime_pipeline
+    source: \ROCR-Runtime
+    trigger:
+      branches:
+        include:
+        - amd-mainline
+# this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime
+
+trigger: none
+pr: none
+
+jobs:
+  - template: ${{ variables.CI_COMPONENT_PATH }}/aomp.yml
+    parameters:
+      checkoutRepo: aomp_repo
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: AMDMIGraphX
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
-# - name: sparseCheckoutDir
-#   type: string
-#   default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -112,11 +93,7 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+  - job: AMDMIGraphX_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -144,8 +121,6 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -171,12 +146,12 @@ jobs:
        gpuTarget: ${{ job.target }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: AMDMIGraphX_test_${{ job.target }}
+    dependsOn: AMDMIGraphX_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
@@ -208,8 +183,6 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: CMake@1
      displayName: MIGraphXTest CMake Flags
      inputs:
@@ -226,7 +199,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        componentName: AMDMIGraphX
        testExecutable: make
        testParameters: -j$(nproc) check
        testPublishResults: false
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: MIOpen
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -93,31 +74,10 @@ parameters:
        target: gfx942
      - gfx90a:
        target: gfx90a
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - MIVisionX:
-      name: MIVisionX
-      checkoutRepo: mivisionx_repo
-      sparseCheckoutDir: ''
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - MIOpen_build
-    - AMDMIGraphX:
-      name: AMDMIGraphX
-      checkoutRepo: amdmigraphx_repo
-      sparseCheckoutDir: ''
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - MIOpen_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+  - job: MIOpen_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -135,7 +95,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/miopen-get-ck-build.yml
      parameters:
        gpuTarget: ${{ job.target }}
@@ -145,13 +104,11 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: Build and install other dependencies
      inputs:
        targetType: inline
-        workingDirectory: $(Agent.BuildDirectory)/s
+        workingDirectory: $(Build.SourcesDirectory)
        script: |
          sed -i '/composable_kernel/d' requirements.txt
          mkdir -p $(Agent.BuildDirectory)/miopen-deps
@@ -173,10 +130,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -188,9 +143,9 @@ jobs:
          - miopen-deps

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
+  - job: MIOpen_test_${{ job.target }}
    timeoutInMinutes: 180
-    dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+    dependsOn: MIOpen_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -214,7 +169,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/miopen-get-ck-build.yml
      parameters:
@@ -224,13 +178,11 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: Build and install other dependencies
      inputs:
        targetType: inline
-        workingDirectory: $(Agent.BuildDirectory)/s
+        workingDirectory: $(Build.SourcesDirectory)
        script: |
          sed -i '/composable_kernel/d' requirements.txt
          mkdir -p $(Agent.BuildDirectory)/miopen-deps
@@ -241,7 +193,7 @@ jobs:
      displayName: 'MIOpen Test CMake Flags'
      inputs:
        cmakeArgs: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/s/bin;$(Agent.BuildDirectory)/miopen-deps
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Build.SourcesDirectory)/bin;$(Agent.BuildDirectory)/miopen-deps
          -DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocm
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
@@ -251,19 +203,19 @@ jobs:
          -DBUILD_DEV=OFF
          -DMIOPEN_USE_MLIR=ON
          -DMIOPEN_GPU_SYNC=OFF
-          $(Agent.BuildDirectory)/s
+          ..
    - task: Bash@3
      displayName: 'MIOpen Test Build'
      inputs:
        targetType: inline
-        workingDirectory: build
        script: |
          cmake --build . --target tests -- -j$(nproc)
+        workingDirectory: $(Build.SourcesDirectory)/build
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32|GPU_Conv2dTuningAsmBwdWrw_FP32"'
+        componentName: MIOpen
+        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
@@ -272,15 +224,3 @@ jobs:
        gpuTarget: ${{ job.target }}
        extraCopyDirectories:
          - miopen-deps
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ component.checkoutRepo }}
-#           # sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           buildDependsOn: ${{ component.buildDependsOn }}
-#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/MIVisionX.yml
+++ b/.azuredevops/components/MIVisionX.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: MIVisionX
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
-# - name: sparseCheckoutDir
-#   type: string
-#   default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -79,7 +60,6 @@ parameters:
 - name: rocmTestDependencies
  type: object
  default:
-    - aomp
    - clr
    - half
    - hipBLAS-common
@@ -108,11 +88,7 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+  - job: MIVisionX_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -134,8 +110,6 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -157,12 +131,12 @@ jobs:
    #     gpuTarget: ${{ job.target }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: MIVisionX_test_${{ job.target }}
+    dependsOn: MIVisionX_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
@@ -187,8 +161,6 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: Build MIVisionX tests
      inputs:
@@ -202,7 +174,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        componentName: MIVisionX
        testDir: 'mivisionx-tests'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
--- a/.azuredevops/components/ROCR-Runtime.yml
+++ b/.azuredevops/components/ROCR-Runtime.yml
@@ -28,8 +28,8 @@ parameters:
 - name: rocmTestDependencies
  type: object
  default:
-    - amdsmi
    - llvm-project
+    - rocm_smi_lib
    - rocprofiler-register

 - name: jobMatrix
@@ -111,6 +111,14 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        packageManager: ${{ job.packageManager }}
+    - task: Bash@3
+      displayName: Install libhwloc5
+      inputs:
+        targetType: 'inline'
+        script: |
+          wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc5_1.11.12-3_amd64.deb
+          wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc-dev_1.11.12-3_amd64.deb
+          sudo apt install -y --allow-downgrades ./libhwloc5_1.11.12-3_amd64.deb ./libhwloc-dev_1.11.12-3_amd64.deb
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
@@ -153,10 +161,6 @@ jobs:
        targetType: 'inline'
        workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
        script: |
-          echo $(Build.SourcesDirectory)/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-          sudo cat /etc/ld.so.conf.d/rocm-ci.conf
-          sudo ldconfig -v
-          ldconfig -p
          if [ -e /opt/rh/gcc-toolset-14/enable ]; then
            source /opt/rh/gcc-toolset-14/enable
          fi
--- a/.azuredevops/components/ROCmValidationSuite.yml
+++ b/.azuredevops/components/ROCmValidationSuite.yml
@@ -86,7 +86,8 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    - name: HIP_INC_DIR
      value: $(Agent.BuildDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/Tensile.yml
+++ b/.azuredevops/components/Tensile.yml
@@ -33,9 +33,8 @@ parameters:
  type: object
  default:
    - cmake
-    - libboost-filesystem-dev
-    - libboost-program-options-dev
    - libmsgpack-dev
+    - libboost-program-options-dev
 - name: pipModules
  type: object
  default:
--- a/.azuredevops/components/composable_kernel.yml
+++ b/.azuredevops/components/composable_kernel.yml
@@ -107,7 +107,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        gpuTarget: ${{ job.target }}
-    # if this artifact name is changed, please also update $ARTIFACT_URL inside miopen-get-ck-build.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/copyHIP.yml
+++ b/.azuredevops/components/copyHIP.yml
@@ -39,6 +39,4 @@ jobs:
      parameters:
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      inputs:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
--- a/.azuredevops/components/hipBLAS-common.yml
+++ b/.azuredevops/components/hipBLAS-common.yml
@@ -51,15 +51,15 @@ parameters:
    buildJobs:
      - { os: ubuntu2204, packageManager: apt }
      - { os: almalinux8, packageManager: dnf }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipBLASLt:
-      name: hipBLASLt
-      sparseCheckoutDir: projects/hipblaslt
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - hipBLAS_common_build
+# - name: downstreamComponentMatrix
+#   type: object
+#   default:
+#     - hipBLASLt:
+#       name: hipBLASLt
+#       sparseCheckoutDir: projects/hipblaslt
+#       skipUnifiedBuild: 'false'
+#       buildDependsOn:
+#         - hipBLAS_common_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -122,14 +122,14 @@ jobs:
    #     extraEnvVars:
    #       - ROCM_PATH:::/home/user/workspace/rocm

- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+# - ${{ if parameters.triggerDownstreamJobs }}:
+#   - ${{ each component in parameters.downstreamComponentMatrix }}:
+#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
+#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
+#         parameters:
+#           checkoutRepo: ${{ parameters.checkoutRepo }}
+#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
+#           buildDependsOn: ${{ component.buildDependsOn }}
+#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+#           triggerDownstreamJobs: true
+#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/hipBLAS.yml
+++ b/.azuredevops/components/hipBLAS.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipBLAS
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -88,30 +69,10 @@ parameters:
        target: gfx942
      - gfx90a:
        target: gfx90a
-# MIOpen depends on both rocRAND and hipBLAS
-# for a unified build, hipBLAS will be the one to call MIOpen
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - MIOpen:
-      name: MIOpen
-      sparseCheckoutDir: projects/miopen
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - hipBLAS_build
-      unifiedBuild:
-        downstreamAggregateNames: hipBLAS+rocRAND
-        buildDependsOn:
-          - hipBLAS_build
-          - rocRAND_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+  - job: hipBLAS_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -127,7 +88,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
@@ -135,8 +95,6 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -151,12 +109,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -166,67 +121,46 @@ jobs:
        installAOCL: true
        gpuTarget: ${{ job.target }}

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testExecutable: $(Agent.BuildDirectory)/rocm/bin/hipblas-test
-          testParameters: '--yaml hipblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
-          ${{ if parameters.unifiedBuild }}:
-            buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
-          ${{ else }}:
-            buildDependsOn: ${{ component.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipBLAS_test_${{ job.target }}
+    dependsOn: hipBLAS_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipBLAS
+        testExecutable: $(Agent.BuildDirectory)/rocm/bin/hipblas-test
+        testParameters: '--yaml hipblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipBLASLt.yml
+++ b/.azuredevops/components/hipBLASLt.yml
@@ -77,28 +77,28 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { pool: rocm-ci_ultra_build_pool, os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { pool: rocm-ci_ultra_build_pool, os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
+      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - rocBLAS:
-      name: rocBLAS
-      sparseCheckoutDir: projects/rocblas
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - hipBLASLt_build
+# - name: downstreamComponentMatrix
+#   type: object
+#   default:
+#     - rocBLAS:
+#       name: rocBLAS
+#       sparseCheckoutDir: projects/rocblas
+#       skipUnifiedBuild: 'false'
+#       buildDependsOn:
+#         - hipBLASLt_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -121,7 +121,7 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    - name: DAY_STRING
      value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
-    pool: ${{ job.pool }}
+    pool: ${{ variables.ULTRA_BUILD_POOL }}
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
@@ -140,10 +140,6 @@ jobs:
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -160,15 +156,18 @@ jobs:
        script: |
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
+    # hipBLASLt has a script for gtest and lapack
+    # https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
+    # $(Agent.BuildDirectory)/deps is a temporary folder for the build process
+    # $(Agent.BuildDirectory)/s/deps is part of the hipBLASLt repo
    - task: Bash@3
-      displayName: Build and install LAPACK
+      displayName: Build and install external dependencies
      inputs:
        targetType: inline
        script: |
-          mkdir -p $(Agent.BuildDirectory)/temp-deps
-          cd $(Agent.BuildDirectory)/temp-deps
-          # position-independent LAPACK is required for almalinux8 builds
-          cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
+          mkdir -p $(Agent.BuildDirectory)/deps
+          cd $(Agent.BuildDirectory)/deps
+          cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
          make
          sudo make install
    - script: |
@@ -188,7 +187,7 @@ jobs:
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
@@ -245,7 +244,6 @@ jobs:
      workspace:
        clean: all
      steps:
-      - checkout: none
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
@@ -282,14 +280,14 @@ jobs:
          environment: test
          gpuTarget: ${{ job.target }}

- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+# - ${{ if parameters.triggerDownstreamJobs }}:
+#   - ${{ each component in parameters.downstreamComponentMatrix }}:
+#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
+#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
+#         parameters:
+#           checkoutRepo: ${{ parameters.checkoutRepo }}
+#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
+#           buildDependsOn: ${{ component.buildDependsOn }}
+#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+#           triggerDownstreamJobs: true
+#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/hipFFT.yml
+++ b/.azuredevops/components/hipFFT.yml
@@ -80,11 +80,11 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+          - ${{ build }}_${{ job.target }} # todo: add OS
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -141,12 +141,12 @@ jobs:
    #     gpuTarget: ${{ job.target }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: ${{ parameters.componentName }}_test_${{ job.target }}
+    dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
@@ -156,7 +156,6 @@ jobs:
    workspace:
      clean: all
    steps:
-    - checkout: none
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/hipRAND.yml
+++ b/.azuredevops/components/hipRAND.yml
@@ -72,15 +72,15 @@ parameters:
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - rocFFT:
-      name: rocFFT
-      sparseCheckoutDir: projects/rocfft
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - hipRAND_build
+# - name: downstreamComponentMatrix
+#   type: object
+#   default:
+#     - rocFFT:
+#       name: rocFFT
+#       sparseCheckoutDir: projects/rocfft
+#       skipUnifiedBuild: 'false'
+#       buildDependsOn:
+#         - hipRAND_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -206,14 +206,14 @@ jobs:
          environment: test
          gpuTarget: ${{ job.target }}

- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+# - ${{ if parameters.triggerDownstreamJobs }}:
+#   - ${{ each component in parameters.downstreamComponentMatrix }}:
+#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
+#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
+#         parameters:
+#           checkoutRepo: ${{ parameters.checkoutRepo }}
+#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
+#           buildDependsOn: ${{ component.buildDependsOn }}
+#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+#           triggerDownstreamJobs: true
+#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/hipSOLVER.yml
+++ b/.azuredevops/components/hipSOLVER.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipSOLVER
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -85,15 +66,12 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }}
+  - job: hipSOLVER_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
@@ -104,21 +82,18 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
  # build external gtest and lapack
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: external
-        cmakeBuildDir: '$(Agent.BuildDirectory)/s/deps/build'
-        cmakeSourceDir: '$(Agent.BuildDirectory)/s/deps'
+        cmakeBuildDir: '$(Build.SourcesDirectory)/deps/build'
+        cmakeSourceDir: '$(Build.SourcesDirectory)/deps'
        installDir: '$(Pipeline.Workspace)/deps-install'
        extraBuildFlags: >-
          -DBUILD_BOOST=OFF
@@ -137,10 +112,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -150,49 +123,44 @@ jobs:
    #     extraCopyDirectories:
    #       - deps-install

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './hipsolver-test'
-          testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipSOLVER_test_${{ job.target }}
+    dependsOn: hipSOLVER_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipSOLVER
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './hipsolver-test'
+        testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipSPARSE.yml
+++ b/.azuredevops/components/hipSPARSE.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipSPARSE
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,11 +14,13 @@ parameters:
  type: object
  default:
    - cmake
-    - gfortran
-    - git
-    - libboost-program-options-dev
-    - libfftw3-dev
    - ninja-build
+    - libboost-program-options-dev
+    - googletest
+    - libfftw3-dev
+    - git
+    - gfortran
+    - libgtest-dev
    - python3-pip
 - name: rocmDependencies
  type: object
@@ -66,31 +49,19 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipSPARSELt:
-      name: hipSPARSELt
-      sparseCheckoutDir: projects/hipsparselt
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - hipSPARSE_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: hipSPARSE_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -102,57 +73,42 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
          -DCMAKE_BUILD_TYPE=Release
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake/
          -DBUILD_CLIENTS_TESTS=ON
          -DBUILD_CLIENTS_SAMPLES=OFF
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+        artifactName: hipSPARSE
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        artifactName: hipSPARSE
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        publish: false
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
      parameters:
-        sourceDir: $(Agent.BuildDirectory)/s/build/clients
+        sourceDir: $(Build.SourcesDirectory)/build/clients
        contentsString: matrices/**
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        artifactName: testMatrices
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
@@ -160,65 +116,44 @@ jobs:
    #     environment: test
    #     gpuTarget: ${{ job.target }}

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './hipsparse-test'
-          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
-  - ${{ if parameters.triggerDownstreamJobs }}:
-    - ${{ each component in parameters.downstreamComponentMatrix }}:
-      - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-        - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-          parameters:
-            checkoutRepo: ${{ parameters.checkoutRepo }}
-            sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-            buildDependsOn: ${{ component.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-            triggerDownstreamJobs: true
-            unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipSPARSE_test_${{ job.target }}
+    dependsOn: hipSPARSE_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipSPARSE
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './hipsparse-test'
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipSPARSELt.yml
+++ b/.azuredevops/components/hipSPARSELt.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipSPARSELt
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -75,17 +56,15 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
+        target: gfx942
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
+      - gfx942:
+        target: gfx942

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: hipSPARSELt_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -107,22 +86,17 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
  # Build and install gtest and lapack
  # $(Pipeline.Workspace)/deps is a temporary folder for the build process
  # $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
@@ -141,7 +115,6 @@ jobs:
      workingDirectory: $(Pipeline.Workspace)/deps
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
@@ -157,80 +130,64 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          gpuTarget: ${{ job.target }}
-          extraCopyDirectories:
-            - deps
-          extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
-            - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
-            - CMAKE_CXX_COMPILER:::/home/user/workspace/rocm/llvm/bin/hipcc
-            - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
-          installLatestCMake: true
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        gpuTarget: ${{ job.target }}
+        extraCopyDirectories:
+          - deps
+        extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
+        extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+          - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
+          - CMAKE_CXX_COMPILER:::/home/user/workspace/rocm/llvm/bin/hipcc
+          - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
+        installLatestCMake: true

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      timeoutInMinutes: 120
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './hipsparselt-test'
-          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          environment: test
-          gpuTarget: ${{ job.target }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipSPARSELt_test_${{ job.target }}
+    dependsOn: hipSPARSELt_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipSPARSELt
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './hipsparselt-test'
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/llvm-project.yml
+++ b/.azuredevops/components/llvm-project.yml
@@ -67,6 +67,7 @@ jobs:
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
+        skipLlvmSymlink: true
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
--- a/.azuredevops/components/rocAL.yml
+++ b/.azuredevops/components/rocAL.yml
@@ -86,7 +86,8 @@ jobs:
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
-    pool:  ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocALUTION.yml
+++ b/.azuredevops/components/rocALUTION.yml
@@ -73,7 +73,8 @@ jobs:
    - template: /.azuredevops/variables-global.yml
    - name: HIP_ROCCLR_HOME
      value: $(Build.BinariesDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocBLAS.yml
+++ b/.azuredevops/components/rocBLAS.yml
@@ -33,15 +33,17 @@ parameters:
  type: object
  default:
    - cmake
-    - git
-    - gfortran
-    - libdrm-dev
-    - libmsgpack-dev
-    - libopenblas-dev
    - ninja-build
-    - python3-pip
    - python3-venv
+    - git
+    - libmsgpack-dev
+    - gfortran
+    - libopenblas-dev
+    - googletest
+    - libgtest-dev
    - wget
+    - python3-pip
+    - libdrm-dev
 - name: pipModules
  type: object
  default:
@@ -50,17 +52,18 @@ parameters:
 - name: rocmDependencies
  type: object
  default:
-    - aomp
-    - clr
-    - hipBLAS-common
-    - hipBLASLt
-    - llvm-project
    - rocm-cmake
-    - rocm-core
-    - rocm_smi_lib
+    - llvm-project
+    - ROCR-Runtime
+    - clr
    - rocminfo
    - rocprofiler-register
-    - ROCR-Runtime
+    - rocm_smi_lib
+    - rocm-core
+    - aomp
+    - aomp-extras
+    - hipBLAS-common
+    - hipBLASLt
    - roctracer
 - name: rocmTestDependencies
  type: object
@@ -83,38 +86,32 @@ parameters:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - rocSPARSE:
-      name: rocSPARSE
-      sparseCheckoutDir: projects/rocsparse
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocBLAS_build
-    # rocSOLVER depends on both rocBLAS and rocPRIM
-    # for a unified build, rocBLAS will be the one to call rocSOLVER
-    - rocSOLVER:
-      name: rocSOLVER
-      sparseCheckoutDir: projects/rocsolver
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocBLAS_build
-      unifiedBuild:
-        downstreamAggregateNames: rocBLAS+rocPRIM
-        buildDependsOn:
-          - rocBLAS_build
-          - rocPRIM_build
+# - name: downstreamComponentMatrix
+#   type: object
+#   default:
+#     # rocSOLVER depends on both rocBLAS and rocPRIM
+#     # for a unified build, rocBLAS will be the one to call rocSOLVER
+#     - rocSOLVER:
+#       name: rocSOLVER
+#       sparseCheckoutDir: projects/rocsolver
+#       skipUnifiedBuild: 'false'
+#       buildDependsOn:
+#         - rocBLAS_build
+#       unifiedBuild:
+#         downstreamAggregateNames: rocBLAS+rocPRIM
+#         buildDependsOn:
+#           - rocBLAS_build
+#           - rocPRIM_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -154,12 +151,6 @@ jobs:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -173,12 +164,21 @@ jobs:
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
+          -DCMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DCMAKE_BUILD_TYPE=Release
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
          -DGPU_TARGETS=${{ job.target }}
+          -DTensile_CODE_OBJECT_VERSION=default
+          -DTensile_LOGIC=asm_full
+          -DTensile_SEPARATE_ARCHITECTURES=ON
+          -DTensile_LAZY_LIBRARY_LOADING=ON
+          -DTensile_LIBRARY_FORMAT=msgpack
          -DBUILD_CLIENTS_TESTS=ON
+          -DBUILD_CLIENTS_BENCHMARKS=OFF
+          -DBUILD_CLIENTS_SAMPLES=OFF
+          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
@@ -208,7 +208,6 @@ jobs:
 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      timeoutInMinutes: 120
      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
      condition:
        and(succeeded(),
@@ -223,7 +222,6 @@ jobs:
      workspace:
        clean: all
      steps:
-      - checkout: none
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
@@ -260,18 +258,18 @@ jobs:
          environment: test
          gpuTarget: ${{ job.target }}

- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
-          ${{ if parameters.unifiedBuild }}:
-            buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
-          ${{ else }}:
-            buildDependsOn: ${{ component.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+# - ${{ if parameters.triggerDownstreamJobs }}:
+#   - ${{ each component in parameters.downstreamComponentMatrix }}:
+#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
+#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
+#         parameters:
+#           checkoutRepo: ${{ parameters.checkoutRepo }}
+#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
+#           triggerDownstreamJobs: true
+#           unifiedBuild: ${{ parameters.unifiedBuild }}
+#           ${{ if parameters.unifiedBuild }}:
+#             buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
+#             downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
+#           ${{ else }}:
+#             buildDependsOn: ${{ component.buildDependsOn }}
+#             downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
--- a/.azuredevops/components/rocFFT.yml
+++ b/.azuredevops/components/rocFFT.yml
@@ -78,19 +78,19 @@ parameters:
        target: gfx942
      - gfx90a:
        target: gfx90a
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipFFT:
-      name: hipFFT
-      sparseCheckoutDir: projects/hipfft
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocFFT_build
+# - name: downstreamComponentMatrix
+#   type: object
+#   default:
+#     - hipFFT:
+#       name: hipFFT
+#       sparseCheckoutDir: projects/hipfft
+#       skipUnifiedBuild: 'false'
+#       buildDependsOn:
+#         - rocFFT_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
@@ -151,12 +151,12 @@ jobs:
          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
+  - job: ${{ parameters.componentName }}_test_${{ job.target }}
+    dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
@@ -166,7 +166,6 @@ jobs:
    workspace:
      clean: all
    steps:
-    - checkout: none
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
@@ -196,14 +195,14 @@ jobs:
        environment: test
        gpuTarget: ${{ job.target }}

- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+# - ${{ if parameters.triggerDownstreamJobs }}:
+#   - ${{ each component in parameters.downstreamComponentMatrix }}:
+#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
+#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
+#         parameters:
+#           checkoutRepo: ${{ parameters.checkoutRepo }}
+#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
+#           buildDependsOn: ${{ component.buildDependsOn }}
+#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+#           triggerDownstreamJobs: true
+#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/rocMLIR.yml
+++ b/.azuredevops/components/rocMLIR.yml
@@ -27,7 +27,6 @@ parameters:
    - numpy
    - tomli
    - scipy
-    - pybind11
 - name: rocmDependencies
  type: object
  default:
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -91,12 +91,12 @@ parameters:
        - rocPRIM_build
    # rocSOLVER depends on both rocBLAS and rocPRIM
    # for a unified build, rocBLAS will be the one to call rocSOLVER
-    - rocSOLVER:
-      name: rocSOLVER
-      sparseCheckoutDir: projects/rocsolver
-      skipUnifiedBuild: 'true'
-      buildDependsOn:
-        - rocPRIM_build
+    # - rocSOLVER:
+    #   name: rocSOLVER
+    #   sparseCheckoutDir: projects/rocsolver
+    #   skipUnifiedBuild: 'true'
+    #   buildDependsOn:
+    #     - rocPRIM_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -210,7 +210,7 @@ jobs:
        parameters:
          componentName: ${{ parameters.componentName }}
          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
-          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
+          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
          os: ${{ job.os }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
--- a/.azuredevops/components/rocPyDecode.yml
+++ b/.azuredevops/components/rocPyDecode.yml
@@ -36,7 +36,6 @@ parameters:
    - clr
    - llvm-project
    - rocDecode
-    - rocJPEG
    - rocm-cmake
    - rocm-core
    - rocminfo
@@ -193,9 +192,9 @@ jobs:
      inputs:
        itemPattern: '**/*.whl'
        targetPath: $(Agent.BuildDirectory)
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
-        gpuTarget: ${{ job.target }}
+        checkoutRepo: ${{ parameters.checkoutRepo }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
@@ -222,17 +221,25 @@ jobs:
    - task: CMake@1
      displayName: 'rocPyDecode Test CMake Flags'
      inputs:
-        workingDirectory: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
        cmakeArgs: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(PYTHON_USER_SITE)/pybind11;$(PYTHON_DIST_PACKAGES)/pybind11;$(PYBIND11_PATH)
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
-          .
+          ..
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
        componentName: rocPyDecode
-        testDir: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
+        testDir: $(Build.SourcesDirectory)/build
+  # sudo required for pip install but screws up permissions for next pipeline run
+    - task: Bash@3
+      displayName: Clean up test environment
+      condition: always()
+      inputs:
+        targetType: inline
+        script: |
+          pip uninstall -y rocPyDecode
+          pip uninstall -y hip-python
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocRAND.yml
+++ b/.azuredevops/components/rocRAND.yml
@@ -79,12 +79,6 @@ parameters:
      skipUnifiedBuild: 'false'
      buildDependsOn:
        - rocRAND_build
-    - MIOpen:
-      name: MIOpen
-      sparseCheckoutDir: projects/miopen
-      skipUnifiedBuild: 'true'
-      buildDependsOn:
-        - rocRAND_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
--- a/.azuredevops/components/rocSOLVER.yml
+++ b/.azuredevops/components/rocSOLVER.yml
@@ -33,11 +33,13 @@ parameters:
  type: object
  default:
    - cmake
-    - gfortran
-    - git
-    - libfmt-dev
-    - libsuitesparse-dev
    - ninja-build
+    - libsuitesparse-dev
+    - gfortran
+    - libfmt-dev
+    - git
+    - googletest
+    - libgtest-dev
    - python3-pip
 - name: rocmDependencies
  type: object
@@ -73,38 +75,16 @@ parameters:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipBLAS:
-      name: hipBLAS
-      sparseCheckoutDir: projects/hipblas
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocSOLVER_build
-    # hipSOLVER depends on both rocSOLVER and rocSPARSE
-    # for a unified build, rocSOLVER will be the one to call hipSOLVER
-    - hipSOLVER:
-      name: hipSOLVER
-      sparseCheckoutDir: projects/hipsolver
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocSOLVER_build
-      unifiedBuild:
-        downstreamAggregateNames: rocSOLVER+rocSPARSE
-        buildDependsOn:
-          - rocSOLVER_build
-          - rocSPARSE_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
@@ -139,10 +119,6 @@ jobs:
        targetType: inline
        script: git clone --depth 1 --branch v3.9.1 https://github.com/Reference-LAPACK/lapack
        workingDirectory: '$(Build.SourcesDirectory)'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -158,7 +134,6 @@ jobs:
        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
-          -DCMAKE_POSITION_INDEPENDENT_CODE=ON
          -DCMAKE_Fortran_FLAGS=-fno-optimize-sibling-calls
          -DBUILD_TESTING=OFF
          -DCBLAS=ON
@@ -171,7 +146,7 @@ jobs:
      parameters:
        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install;$(Agent.BuildDirectory)/vendor
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DAMDGPU_TARGETS=${{ job.target }}
@@ -216,7 +191,6 @@ jobs:
      workspace:
        clean: all
      steps:
-      - checkout: none
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
@@ -250,19 +224,3 @@ jobs:
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
-
- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
-          ${{ if parameters.unifiedBuild }}:
-            buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
-          ${{ else }}:
-            buildDependsOn: ${{ component.buildDependsOn }}
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
--- a/.azuredevops/components/rocSPARSE.yml
+++ b/.azuredevops/components/rocSPARSE.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocSPARSE
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -32,25 +13,27 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - cmake
-    - gfortran
-    - git
-    - libboost-program-options-dev
-    - libdrm-dev
-    - libfftw3-dev
-    - ninja-build
    - python3-pip
+    - cmake
+    - ninja-build
+    - libboost-program-options-dev
+    - googletest
+    - libfftw3-dev
+    - git
+    - gfortran
+    - libgtest-dev
+    - libdrm-dev
 - name: rocmDependencies
  type: object
  default:
-    - clr
-    - llvm-project
-    - rocBLAS
    - rocm-cmake
+    - llvm-project
+    - ROCR-Runtime
+    - clr
+    - rocBLAS
    - rocminfo
    - rocPRIM
    - rocprofiler-register
-    - ROCR-Runtime
    - roctracer
 - name: rocmTestDependencies
  type: object
@@ -69,39 +52,19 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipSPARSE:
-      name: hipSPARSE
-      sparseCheckoutDir: projects/hipsparse
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocSPARSE_build
-    # hipSOLVER depends on both rocSOLVER and rocSPARSE
-    # for a unified build, rocSOLVER will be the one to call hipSOLVER
-    # - hipSOLVER:
-    #   name: hipSOLVER
-    #   sparseCheckoutDir: projects/hipsolver
-    #   skipUnifiedBuild: 'true'
-    #   buildDependsOn:
-    #     - rocSPARSE_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocSPARSE_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -114,32 +77,22 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
-          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang++
-          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
+          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
+          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_BUILD_TYPE=Release
          -DAMDGPU_TARGETS=${{ job.target }}
@@ -150,94 +103,68 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+        artifactName: rocSPARSE
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        artifactName: rocSPARSE
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        publish: false
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
      parameters:
-        sourceDir: $(Agent.BuildDirectory)/s/build/clients
+        sourceDir: $(Build.SourcesDirectory)/build/clients
        contentsString: matrices/**
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        artifactName: testMatrices
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
+        extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      timeoutInMinutes: 120
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './rocsparse-test'
-          testParameters: '--gtest_filter="*quick*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocSPARSE_test_${{ job.target }}
+    timeoutInMinutes: 90
+    dependsOn: rocSPARSE_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocSPARSE
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './rocsparse-test'
+        testParameters: '--gtest_filter="*quick*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -184,7 +184,7 @@ jobs:
      parameters:
        componentName: rocm-examples
        testDir: $(Build.SourcesDirectory)/build
-        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "rocfft_callback"'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocm-libraries.yml
+++ b/.azuredevops/components/rocm-libraries.yml
@@ -1,163 +0,0 @@
-parameters:
- name: componentName
-  type: string
-  default: rocm_libraries
- name: checkoutRepo
-  type: string
-  default: 'self'
- name: checkoutRef
-  type: string
-  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
-# set to true if doing full build of ROCm stack
-# and dependencies are pulled from same pipeline
- name: aggregatePipeline
-  type: boolean
-  default: false
- name: aptPackages
-  type: object
-  default:
-    - ccache
-    - gfortran
-    - git
-    - libdrm-dev
-    - libmsgpack-dev
-    - libnuma-dev
-    - ninja-build
-    - python3-pip
-    - python3-venv
- name: pipModules
-  type: object
-  default:
-    - joblib
-    - "packaging>=22.0"
-    - --upgrade
- name: rocmDependencies
-  type: object
-  default:
-    - aomp
-    - clr
-    - llvm-project
-    - rocminfo
-    - rocm-cmake
-    - rocm_smi_lib
-    - rocprofiler-register
-    - ROCR-Runtime
-    - roctracer
- name: rocmTestDependencies
-  type: object
-  default:
-    - aomp
-    - clr
-    - llvm-project
-    - rocminfo
-    - rocm_smi_lib
-    - rocprofiler-register
-    - ROCR-Runtime
-    - roctracer
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { pool: rocm-ci_ultra_build_pool, os: ubuntu2204, packageManager: apt, target: gfx942 }
-
-jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    timeoutInMinutes: 300
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: DAY_STRING
-      value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
-    pool: ${{ job.pool }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        gpuTarget: ${{ job.target }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-    - script: |
-        mkdir -p $(CCACHE_DIR)
-        echo "##vso[task.prependpath]/usr/lib/ccache"
-      displayName: Update path for ccache
-    - task: Cache@2
-      displayName: Ccache caching
-      inputs:
-        key: rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-        path: $(CCACHE_DIR)
-        restoreKeys: |
-          rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
-          rocm-libraries | ${{ job.os }} | ${{ job.target }}
-          rocm-libraries | ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        extraBuildFlags: >-
-          -DROCM_LIBRARIES_SUPERBUILD=ON
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          gpuTarget: ${{ job.target }}
-          extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
-          installLatestCMake: true
-          extraCopyDirectories:
-            - deps
--- a/.azuredevops/components/rocminfo.yml
+++ b/.azuredevops/components/rocminfo.yml
@@ -67,6 +67,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
+        skipLlvmSymlink: true
        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -65,19 +65,43 @@ parameters:
  type: object
  default:
    buildJobs:
-      - gfx942:
+      - gfx942-staging:
+        name: gfx942_staging
        target: gfx942
-      - gfx90a:
+        dependencySource: staging
+      - gfx942-mainline:
+        name: gfx942_mainline
+        target: gfx942
+        dependencySource: mainline
+      - gfx90a-staging:
+        name: gfx90a_staging
        target: gfx90a
+        dependencySource: staging
+      - gfx90a-mainline:
+        name: gfx90a_mainline
+        target: gfx90a
+        dependencySource: mainline
    testJobs:
-      - gfx942:
+      - gfx942-staging:
+        name: gfx942_staging
        target: gfx942
-      - gfx90a:
+        dependencySource: staging
+      - gfx942-mainline:
+        name: gfx942_mainline
+        target: gfx942
+        dependencySource: mainline
+      - gfx90a-staging:
+        name: gfx90a_staging
        target: gfx90a
+        dependencySource: staging
+      - gfx90a-mainline:
+        name: gfx90a_mainline
+        target: gfx90a
+        dependencySource: mainline

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocprofiler_compute_build_${{ job.target }}
+  - job: rocprofiler_compute_build_${{ job.name }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -100,9 +124,11 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
+        artifactName: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
+        artifactName: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -112,9 +138,9 @@ jobs:
    #     gpuTarget: ${{ job.target }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: rocprofiler_compute_test_${{ job.target }}
+  - job: rocprofiler_compute_test_${{ job.name }}
    timeoutInMinutes: 120
-    dependsOn: rocprofiler_compute_build_${{ job.target }}
+    dependsOn: rocprofiler_compute_build_${{ job.name }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -140,12 +166,14 @@ jobs:
        checkoutRepo: ${{ parameters.checkoutRepo }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
+        postTargetFilter: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
+        dependencySource: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
    - task: Bash@3
      displayName: Add en_US.UTF-8 locale
--- a/.azuredevops/components/rocprofiler-systems.yml
+++ b/.azuredevops/components/rocprofiler-systems.yml
@@ -37,7 +37,6 @@ parameters:
    - libpfm4-dev
    - libtool
    - libopenmpi-dev
-    - libsqlite3-dev
    - m4
    - ninja-build
    - openmpi-bin
--- a/.azuredevops/dependencies/boost.yml
+++ b/.azuredevops/dependencies/boost.yml
@@ -40,6 +40,7 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
    parameters:
      dependencyList: ${{ parameters.rocmDependencies }}
+      dependencySource: staging
  - task: Bash@3
    displayName: Add ROCm binaries to PATH
    inputs:
--- a/.azuredevops/nightly/pytorch.yml
+++ b/.azuredevops/nightly/pytorch.yml
@@ -219,6 +219,7 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
    parameters:
      dependencyList: ${{ parameters.rocmDependencies }}
+      dependencySource: staging
      gpuTarget: $(JOB_GPU_TARGET)
      setupHIPLibrarySymlinks: true
  - task: Bash@3
@@ -405,6 +406,8 @@ jobs:
    parameters:
      dependencyList: ${{ parameters.rocmTestDependencies }}
      gpuTarget: $(JOB_GPU_TARGET)
+      dependencySource: staging
+      skipLlvmSymlink: true
 # get sources to run test scripts
  - task: Bash@3
    displayName: git clone upstream pytorch
--- a/.azuredevops/nightly/rocm-nightly.yml
+++ b/.azuredevops/nightly/rocm-nightly.yml
@@ -3,21 +3,21 @@ parameters:
 - name: jobList
  type: object
  default:
-    - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-    - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-    - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-    - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-    - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-    - { os: ubuntu2404, packageManager: apt, target: gfx942 }
-    - { os: ubuntu2404, packageManager: apt, target: gfx90a }
-    - { os: ubuntu2404, packageManager: apt, target: gfx1201 }
-    - { os: ubuntu2404, packageManager: apt, target: gfx1100 }
-    - { os: ubuntu2404, packageManager: apt, target: gfx1030 }
-    - { os: almalinux8, packageManager: dnf, target: gfx942 }
-    - { os: almalinux8, packageManager: dnf, target: gfx90a }
-    - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-    - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-    - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+    - { os: ubuntu2204, target: gfx942, source: staging }
+    - { os: ubuntu2204, target: gfx90a, source: staging }
+    - { os: ubuntu2204, target: gfx1201, source: staging }
+    - { os: ubuntu2204, target: gfx1100, source: staging }
+    - { os: ubuntu2204, target: gfx1030, source: staging }
+    - { os: ubuntu2404, target: gfx942, source: staging }
+    - { os: ubuntu2404, target: gfx90a, source: staging }
+    - { os: ubuntu2404, target: gfx1201, source: staging }
+    - { os: ubuntu2404, target: gfx1100, source: staging }
+    - { os: ubuntu2404, target: gfx1030, source: staging }
+    - { os: almalinux8, target: gfx942, source: staging }
+    - { os: almalinux8, target: gfx90a, source: staging }
+    - { os: almalinux8, target: gfx1201, source: staging }
+    - { os: almalinux8, target: gfx1100, source: staging }
+    - { os: almalinux8, target: gfx1030, source: staging }
 - name: rocmDependencies
  type: object
  default:
@@ -92,8 +92,7 @@ schedules:

 jobs:
 - ${{ each job in parameters.jobList }}:
-  - job: nightly_${{ job.os }}_${{ job.target }}
-    timeoutInMinutes: 90
+  - job: rocm_nightly_${{ job.os }}_${{ job.target }}_${{ job.source }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -116,9 +115,11 @@ jobs:
      displayName: System disk space before ROCm
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
+        dependencySource: ${{ job.source }}
        dependencyList: ${{ parameters.rocmDependencies }}
        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
+        skipLibraryLinking: true
    - script: df -h
      displayName: System disk space after ROCm
    - script: du -sh $(Agent.BuildDirectory)/rocm
@@ -131,7 +132,7 @@ jobs:
        includeRootFolder: false
        archiveType: tar
        tarCompression: gz
-        archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_${{ job.os }}_${{ job.target }}.tar.gz
+        archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_${{ job.target }}.tar.gz
    - script: du -sh $(Build.ArtifactStagingDirectory)
      displayName: Compressed ROCm size
    - task: PublishPipelineArtifact@1
@@ -144,95 +145,5 @@ jobs:
      inputs:
        workingDirectory: $(Pipeline.Workspace)
        targetType: inline
-        script: echo "$(Build.DefinitionName)_$(Build.BuildNumber)_${{ job.os }}_${{ job.target }}.tar.gz" >> pipelineArtifacts.txt
+        script: echo "$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_${{ job.target }}.tar.gz" >> pipelineArtifacts.txt
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.packageManager, 'apt') }}:
-      - task: Bash@3
-        displayName: Create Dockerfile
-        inputs:
-          workingDirectory: $(Agent.BuildDirectory)
-          targetType: inline
-          script: |
-            cat <<'EOF' > Dockerfile
-              ${{ iif(eq(job.os, 'ubuntu2204'), 'FROM ubuntu:22.04', '') }}
-              ${{ iif(eq(job.os, 'ubuntu2404'), 'FROM ubuntu:24.04', '') }}
-
-              WORKDIR /root
-              RUN mkdir rocm
-
-              RUN apt update \
-                && apt upgrade -y \
-                && apt install -y cmake curl git gcc g++ gpg lsb-release lsof ninja-build pkg-config python3 python3-pip wget zip libdrm-dev libelf-dev libgtest-dev libhsakmt-dev libhwloc-dev libnuma-dev libstdc++-12-dev libtbb-dev jq \
-                && apt clean all
-
-              RUN PACKAGE_NAME=$(curl -s https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") \
-                && wget -nv --retry-connrefused https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/$PACKAGE_NAME \
-                && mkdir hsa-amd-aqlprofile \
-                && dpkg-deb -R $PACKAGE_NAME hsa-amd-aqlprofile \
-                && cp -R hsa-amd-aqlprofile/opt/rocm-*/* rocm
-
-              RUN ARTIFACT_URL="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/$(Build.BuildId)/artifacts?artifactName=nightly${{ job.os }}${{ job.target }}&api-version=7.1" \
-                && DOWNLOAD_URL=$(curl -s $ARTIFACT_URL | jq ".resource.downloadUrl" | tr -d '"') \
-                && wget -nv --retry-connrefused $DOWNLOAD_URL -O nightly.zip \
-                && unzip nightly.zip \
-                && tar -xf nightly${{ job.os }}${{ job.target }}/rocm-nightly*${{ job.os }}*${{ job.target }}*.tar.gz -C rocm
-
-              RUN echo /root/rocm/lib | tee /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/llvm/lib | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/llvm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN ldconfig -v
-              ENV PATH="$PATH:/root/rocm/bin"
-              ENTRYPOINT ["/bin/bash"]
-            EOF
-            cat Dockerfile
-    - ${{ elseif eq(job.packageManager, 'dnf') }}:
-      - task: Bash@3
-        displayName: Create Dockerfile
-        inputs:
-          workingDirectory: $(Agent.BuildDirectory)
-          targetType: inline
-          script: |
-            cat <<'EOF' > Dockerfile
-              ${{ iif(eq(job.os, 'almalinux8'), 'FROM almalinux:8', '') }}
-
-              WORKDIR /root
-              RUN mkdir rocm
-
-              RUN dnf install -y cmake curl git gcc gcc-c++ gnupg2 redhat-lsb-core lsof pkgconf python3 python3-pip wget zip libdrm-devel elfutils-libelf-devel numactl-devel libstdc++-devel tbb-devel jq \
-                && dnf clean all
-
-              RUN PACKAGE_NAME=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1) \
-                && wget -nv --retry-connrefused https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$PACKAGE_NAME \
-                && mkdir hsa-amd-aqlprofile \
-                && dnf -y install rpm-build cpio \
-                && rpm2cpio $PACKAGE_NAME | (cd hsa-amd-aqlprofile && cpio -idmv) \
-                && cp -R hsa-amd-aqlprofile/opt/rocm-*/* rocm
-
-              RUN ARTIFACT_URL="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/$(Build.BuildId)/artifacts?artifactName=nightly${{ job.os }}${{ job.target }}&api-version=7.1" \
-                && DOWNLOAD_URL=$(curl -s $ARTIFACT_URL | jq ".resource.downloadUrl" | tr -d '"') \
-                && wget -nv --retry-connrefused $DOWNLOAD_URL -O nightly.zip \
-                && UNZIP_DISABLE_ZIPBOMB_DETECTION=TRUE unzip nightly.zip \
-                && tar -xf nightly${{ job.os }}${{ job.target }}/rocm-nightly*${{ job.os }}*${{ job.target }}*.tar.gz -C rocm
-
-              RUN echo /root/rocm/lib | tee /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/llvm/lib | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN echo /root/rocm/llvm/lib64 | tee -a /etc/ld.so.conf.d/rocm-ci.conf
-              RUN ldconfig -v
-              ENV PATH="$PATH:/root/rocm/bin"
-              ENTRYPOINT ["/bin/bash"]
-            EOF
-            cat Dockerfile
-    - task: Docker@2
-      displayName: Build and upload Docker image
-      inputs:
-        containerRegistry: ContainerService3
-        repository: 'nightly-${{ job.os }}-${{ job.target }}'
-        Dockerfile: '$(Agent.BuildDirectory)/Dockerfile'
-        buildContext: '$(Agent.BuildDirectory)'
-    - task: Bash@3
-      displayName: '!! Docker Run Command !!'
-      inputs:
-        targetType: inline
-        script: echo "docker run -it --network=host --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined rocmexternalcicd.azurecr.io/nightly-${{ job.os }}-${{ job.target }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
--- a/.azuredevops/templates/steps/artifact-download.yml
+++ b/.azuredevops/templates/steps/artifact-download.yml
@@ -12,9 +12,6 @@ parameters:
 - name: fileFilter
  type: string
  default: ''
- name: extractAndDeleteFiles
-  type: boolean
-  default: true
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -40,17 +37,16 @@ steps:
        buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
      ${{ else }}:
        buildVersionToDownload: latestFromBranch
- ${{ if eq(parameters.extractAndDeleteFiles, true) }}:
-  - task: ExtractFiles@1
-    displayName: Extract ${{ parameters.componentName }}
-    inputs:
-      archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
-      destinationFolder: '$(Agent.BuildDirectory)/rocm'
-      cleanDestinationFolder: false
-      overwriteExistingFiles: true
-  - task: DeleteFiles@1
-    displayName: Clean up Compressed ${{ parameters.componentName }}
-    inputs:
-      SourceFolder: '$(Pipeline.Workspace)/d'
-      Contents: '**/*.tar.gz'
-      RemoveDotFiles: true
+- task: ExtractFiles@1
+  displayName: Extract ${{ parameters.componentName }}
+  inputs:
+    archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
+    destinationFolder: '$(Agent.BuildDirectory)/rocm'
+    cleanDestinationFolder: false
+    overwriteExistingFiles: true
+- task: DeleteFiles@1
+  displayName: Cleanup Compressed ${{ parameters.componentName }}
+  inputs:
+    SourceFolder: '$(Pipeline.Workspace)/d'
+    Contents: '**/*.tar.gz'
+    RemoveDotFiles: true
--- a/.azuredevops/templates/steps/artifact-links.yml
+++ b/.azuredevops/templates/steps/artifact-links.yml
@@ -15,8 +15,8 @@ steps:
      URL_BEGIN="https://artprodcus3.artifacts.visualstudio.com/"
      URL_MIDDLE="/_apis/artifact/"
      URL_END="/content?format=file&subPath=%2F"
-      ARTIFACT_NAME="$(Agent.JobName)_$(System.JobAttempt)"
-      ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${ARTIFACT_NAME}"
+      FORMATTED_JOB_NAME=$(echo $(Agent.JobName) | sed 's/ /./g; s/[-_]//g')
+      ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${FORMATTED_JOB_NAME}"
      ENCODED_STRING=$(echo -n "${ARTIFACT_STRING}" | base64 -w 0)
      PADDING_COUNT=$(echo -n "${ENCODED_STRING}" | awk -F= '{print NF-1}')
      if [ "$PADDING_COUNT" -gt 0 ]; then
--- a/.azuredevops/templates/steps/artifact-upload.yml
+++ b/.azuredevops/templates/steps/artifact-upload.yml
@@ -46,6 +46,5 @@ steps:
    displayName: '${{ parameters.artifactName }} Publish'
    retryCountOnTaskFailure: 3
    inputs:
-      # if this artifact name is changed, please also update $ARTIFACT_URL inside miopen-get-ck-build.yml
-      artifactName: $(Agent.JobName)_$(System.JobAttempt)
+      artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
      targetPath: '$(Build.ArtifactStagingDirectory)'
--- a/.azuredevops/templates/steps/dependencies-aocl.yml
+++ b/.azuredevops/templates/steps/dependencies-aocl.yml
@@ -1,15 +1,10 @@
 parameters:
- name: os
-  type: string
-  default: ubuntu2204
 - name: repositoryUrl
  type: string
  default: https://download.amd.com/developer/eula/aocl/aocl-4-2
 - name: packageName
-  type: object
-  default:
-    ubuntu2204: aocl-linux-gcc-4.2.0_1_amd64.deb
-    almalinux8: aocl-linux-gcc-4.2.0-1.x86_64.rpm
+  type: string
+  default: aocl-linux-gcc-4.2.0_1_amd64.deb

 steps:
 - task: Bash@3
@@ -17,19 +12,16 @@ steps:
  inputs:
    targetType: inline
    workingDirectory: $(Pipeline.Workspace)
-    script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName[parameters.os] }}
+    script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName }}
 - task: Bash@3
  displayName: Install AOCL
  inputs:
    targetType: inline
    workingDirectory: $(Pipeline.Workspace)
-    ${{ if eq(parameters.os, 'ubuntu2204') }}:
-      script: sudo apt install -y ./${{ parameters.packageName[parameters.os] }}
-    ${{ elseif eq(parameters.os, 'almalinux8') }}:
-      script: sudo dnf install -y ./${{ parameters.packageName[parameters.os] }}
+    script: sudo apt install -y ./${{ parameters.packageName }}
 - task: Bash@3
  displayName: Clean up AOCL
  inputs:
    targetType: inline
    workingDirectory: $(Pipeline.Workspace)
-    script: rm -f ${{ parameters.packageName[parameters.os] }}
+    script: rm -f ${{ parameters.packageName }}
--- a/.azuredevops/templates/steps/dependencies-dnf.yml
+++ b/.azuredevops/templates/steps/dependencies-dnf.yml
@@ -52,7 +52,6 @@ parameters:
    libexpat-dev: expat-devel
    libffi-dev: libffi-devel
    libfftw3-dev: fftw-devel
-    libfmt-dev: fmt-devel
    libgmp-dev: gmp-devel
    liblzma-dev: xz-devel
    libmpfr-dev: mpfr-devel
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -3,6 +3,13 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+- name: dependencySource # optional, overrides checkoutRef
+  type: string
+  default: null
+  values:
+    - null # empty strings aren't allowed as values, use null instead
+    - staging
+    - mainline
 - name: dependencyList
  type: object
  default: []
@@ -12,6 +19,16 @@ parameters:
 - name: gpuTarget
  type: string
  default: ''
+# set to true if you're calling this template file multiple files in same pipeline
+# only leave last call false to optimize sequence
+- name: skipLibraryLinking
+  type: boolean
+  default: false
+# set to true if llvm-project is not downloaded in a particular call
+# or if you just don't want the symlink
+- name: skipLlvmSymlink
+  type: boolean
+  default: false
 # set to true if dlopen calls for HIP libraries are causing failures
 # because they do not follow shared library symlink convention
 - name: setupHIPLibrarySymlinks
@@ -31,240 +48,309 @@ parameters:
  type: object
  default:
    AMDMIGraphX:
-      pipelineId: 113
-      developBranch: develop
+      pipelineId: $(AMDMIGRAPHX_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: master
      hasGpuTarget: true
    amdsmi:
-      pipelineId: 99
-      developBranch: amd-staging
+      pipelineId: $(AMDSMI_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    aomp-extras:
-      pipelineId: 111
-      developBranch: aomp-dev
+      pipelineId: $(AOMP_EXTRAS_PIPELINE_ID)
+      stagingBranch: aomp-dev
+      mainlineBranch: aomp-dev
      hasGpuTarget: false
    aomp:
-      pipelineId: 115
-      developBranch: aomp-dev
+      pipelineId: $(AOMP_PIPELINE_ID)
+      stagingBranch: aomp-dev
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    clr:
-      pipelineId: 145
-      developBranch: amd-staging
+      pipelineId: $(CLR_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    composable_kernel:
-      pipelineId: 86
-      developBranch: develop
+      pipelineId: $(COMPOSABLE_KERNEL_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    half:
-      pipelineId: 101
-      developBranch: rocm
+      pipelineId: $(HALF_PIPELINE_ID)
+      stagingBranch: rocm
+      mainlineBranch: rocm
      hasGpuTarget: false
    HIP:
-      pipelineId: 93
-      developBranch: amd-staging
+      pipelineId: $(HIP_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    hip-tests:
-      pipelineId: 233
-      developBranch: amd-staging
+      pipelineId: $(HIP_TESTS_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    hipBLAS:
-      pipelineId: 317
-      developBranch: develop
+      pipelineId: $(HIPBLAS_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipBLASLt:
-      pipelineId: 301
-      developBranch: develop
+      pipelineId: $(HIPBLASLT_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipBLAS-common:
-      pipelineId: 300
-      developBranch: develop
+      pipelineId: $(HIPBLAS_COMMON_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    hipCUB:
-      pipelineId: 277
-      developBranch: develop
+      pipelineId: $(HIPCUB_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: develop
      hasGpuTarget: true
    hipFFT:
-      pipelineId: 283
-      developBranch: develop
+      pipelineId: $(HIPFFT_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipfort:
-      pipelineId: 102
-      developBranch: develop
+      pipelineId: $(HIPFORT_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    HIPIFY:
-      pipelineId: 92
-      developBranch: amd-staging
+      pipelineId: $(HIPIFY_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    hipRAND:
-      pipelineId: 275
-      developBranch: develop
+      pipelineId: $(HIPRAND_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: develop
      hasGpuTarget: true
    hipSOLVER:
-      pipelineId: 84
-      developBranch: develop
+      pipelineId: $(HIPSOLVER_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipSPARSE:
-      pipelineId: 315
-      developBranch: develop
+      pipelineId: $(HIPSPARSE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipSPARSELt:
-      pipelineId: 309
-      developBranch: develop
+      pipelineId: $(HIPSPARSELT_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipTensor:
-      pipelineId: 105
-      developBranch: develop
+      pipelineId: $(HIPTENSOR_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    llvm-project:
-      pipelineId: 2
-      developBranch: amd-staging
+      pipelineId: $(LLVM_PROJECT_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    MIOpen:
-      pipelineId: 320
-      developBranch: develop
+      pipelineId: $(MIOpen_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: amd-master
      hasGpuTarget: true
    MIVisionX:
-      pipelineId: 80
-      developBranch: develop
+      pipelineId: $(MIVISIONX_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: master
+      hasGpuTarget: true
+    omnitrace: # deprecated
+      pipelineId: $(OMNITRACE_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rccl:
-      pipelineId: 107
-      developBranch: develop
+      pipelineId: $(RCCL_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rdc:
-      pipelineId: 100
-      developBranch: amd-staging
+      pipelineId: $(RDC_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocAL:
-      pipelineId: 151
-      developBranch: develop
+      pipelineId: $(ROCAL_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rocALUTION:
-      pipelineId: 89
-      developBranch: develop
+      pipelineId: $(ROCALUTION_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rocBLAS:
-      pipelineId: 302
-      developBranch: develop
+      pipelineId: $(ROCBLAS_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    ROCdbgapi:
-      pipelineId: 135
-      developBranch: amd-staging
+      pipelineId: $(ROCDBGAPI_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocDecode:
-      pipelineId: 79
-      developBranch: develop
+      pipelineId: $(ROCDECODE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    rocFFT:
-      pipelineId: 282
-      developBranch: develop
+      pipelineId: $(ROCFFT_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    ROCgdb:
-      pipelineId: 134
-      developBranch: amd-staging
+      pipelineId: $(ROCGDB_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline-rocgdb-15
      hasGpuTarget: false
    rocJPEG:
-      pipelineId: 262
-      developBranch: develop
+      pipelineId: $(ROCJPEG_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    rocm-cmake:
-      pipelineId: 6
-      developBranch: develop
+      pipelineId: $(ROCM_CMAKE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    rocm-core:
-      pipelineId: 103
-      developBranch: master
+      pipelineId: $(ROCM_CORE_PIPELINE_ID)
+      stagingBranch: master
+      mainlineBranch: amd-master
      hasGpuTarget: false
    rocm-examples:
-      pipelineId: 216
-      developBranch: amd-staging
+      pipelineId: $(ROCM_EXAMPLES_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rocminfo:
-      pipelineId: 91
-      developBranch: amd-staging
+      pipelineId: $(ROCMINFO_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocMLIR:
-      pipelineId: 229
-      developBranch: develop
+      pipelineId: $(ROCMLIR_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: false
    ROCmValidationSuite:
-      pipelineId: 106
-      developBranch: master
+      pipelineId: $(ROCMVALIDATIONSUITE_PIPELINE_ID)
+      stagingBranch: master
+      mainlineBranch: master
      hasGpuTarget: true
    rocm_bandwidth_test:
-      pipelineId: 88
-      developBranch: master
+      pipelineId: $(ROCM_BANDWIDTH_TEST_PIPELINE_ID)
+      stagingBranch: master
+      mainlineBranch: master
      hasGpuTarget: false
    rocm_smi_lib:
-      pipelineId: 96
-      developBranch: amd-staging
+      pipelineId: $(ROCM_SMI_LIB_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocPRIM:
-      pipelineId: 273
-      developBranch: develop
+      pipelineId: $(ROCPRIM_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: develop
      hasGpuTarget: true
    rocprofiler:
-      pipelineId: 143
-      developBranch: amd-staging
+      pipelineId: $(ROCPROFILER_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-master
      hasGpuTarget: true
    rocprofiler-compute:
-      pipelineId: 257
-      developBranch: develop
+      pipelineId: $(ROCPROFILER_COMPUTE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rocprofiler-register:
-      pipelineId: 1
-      developBranch: amd-staging
+      pipelineId: $(ROCPROFILER_REGISTER_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocprofiler-sdk:
-      pipelineId: 246
-      developBranch: amd-staging
+      pipelineId: $(ROCPROFILER_SDK_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rocprofiler-systems:
-      pipelineId: 255
-      developBranch: amd-staging
+      pipelineId: $(ROCPROFILER_SYSTEMS_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rocPyDecode:
-      pipelineId: 239
-      developBranch: develop
+      pipelineId: $(ROCPYDECODE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    ROCR-Runtime:
-      pipelineId: 10
-      developBranch: amd-staging
+      pipelineId: $(ROCR_RUNTIME_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocRAND:
-      pipelineId: 274
-      developBranch: develop
+      pipelineId: $(ROCRAND_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: develop
      hasGpuTarget: true
    rocr_debug_agent:
-      pipelineId: 136
-      developBranch: amd-staging
+      pipelineId: $(ROCR_DEBUG_AGENT_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: false
    rocSOLVER:
-      pipelineId: 81
-      developBranch: develop
+      pipelineId: $(ROCSOLVER_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rocSPARSE:
-      pipelineId: 314
-      developBranch: develop
+      pipelineId: $(ROCSPARSE_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
+    ROCT-Thunk-Interface: # deprecated
+      pipelineId: $(ROCT_THUNK_INTERFACE_PIPELINE_ID)
+      stagingBranch: master
+      mainlineBranch: master
+      hasGpuTarget: false
    rocThrust:
-      pipelineId: 276
-      developBranch: develop
+      pipelineId: $(ROCTHRUST_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: develop
      hasGpuTarget: true
    roctracer:
-      pipelineId: 141
-      developBranch: amd-staging
+      pipelineId: $(ROCTRACER_PIPELINE_ID)
+      stagingBranch: amd-staging
+      mainlineBranch: amd-mainline
      hasGpuTarget: true
    rocWMMA:
-      pipelineId: 109
-      developBranch: develop
+      pipelineId: $(ROCWMMA_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rpp:
-      pipelineId: 78
-      developBranch: develop
+      pipelineId: $(RPP_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    TransferBench:
-      pipelineId: 265
-      developBranch: develop
+      pipelineId: $(TRANSFERBENCH_PIPELINE_ID)
+      stagingBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true

 steps:
@@ -280,44 +366,72 @@ steps:
      parameters:
        componentName: ${{ split(dependency, ':')[0] }}
        pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
-        branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].developBranch }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        extractAndDeleteFiles: false
+        ${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
+          fileFilter: "${{ split(dependency, ':')[1] }}*_${{ parameters.os }}_${{ parameters.gpuTarget }}"
+        # dependencySource = staging
+        ${{ if eq(parameters.dependencySource, 'staging')}}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
+        # dependencySource = mainline
+        ${{ elseif eq(parameters.dependencySource, 'mainline')}}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
+        # checkoutRef = staging
+        ${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].stagingBranch) }}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
+        # checkoutRef = mainline
+        ${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].mainlineBranch) }}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
+        # SourceBranchName = staging
+        ${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].stagingBranch) }}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
+        # SourceBranchName = mainline
+        ${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].mainlineBranch) }}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].mainlineBranch }}
+        # default = staging
+        ${{ else }}:
+          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
 # no colon (:) found in this item in the list
  - ${{ elseif containsValue(split(parameters.downstreamAggregateNames, '+'), dependency) }}:
    - template: local-artifact-download.yml
      parameters:
-        buildType: current
-        preTargetFilter: ${{ dependency }}
-        os: ${{ parameters.os }}
        ${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
          gpuTarget: ${{ parameters.gpuTarget }}
+        preTargetFilter: ${{ dependency }}
+        os: ${{ parameters.os }}
+        buildType: current
  - ${{ else }}:
    - template: artifact-download.yml
      parameters:
        componentName: ${{ dependency }}
        pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
-        branchName: ${{ parameters.componentVarList[dependency].developBranch }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        extractAndDeleteFiles: false
        ${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
          fileFilter: ${{ parameters.os }}_${{ parameters.gpuTarget }}
        ${{ else }}:
          fileFilter: ${{ parameters.os }}
- task: ExtractFiles@1
-  displayName: Extract ROCm artifacts
-  inputs:
-    archiveFilePatterns: $(Pipeline.Workspace)/d/**/*.tar.gz
-    destinationFolder: $(Agent.BuildDirectory)/rocm
-    cleanDestinationFolder: false
-    overwriteExistingFiles: true
- task: DeleteFiles@1
-  displayName: Clean up ROCm artifacts
-  inputs:
-    SourceFolder: $(Pipeline.Workspace)/d
-    Contents: '**/*.tar.gz'
-    RemoveDotFiles: true
- ${{ if containsValue(parameters.dependencyList, 'llvm-project') }}:
+        # dependencySource = staging
+        ${{ if eq(parameters.dependencySource, 'staging')}}:
+          branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
+        # dependencySource = mainline
+        ${{ elseif eq(parameters.dependencySource, 'mainline')}}:
+          branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
+        # checkoutRef = staging
+        ${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].stagingBranch) }}:
+          branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
+        # checkoutRef = mainline
+        ${{ elseif eq(parameters.checkoutRef, parameters.componentVarList[variables['Build.DefinitionName']].mainlineBranch) }}:
+          branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
+        # SourceBranchName = staging
+        ${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].stagingBranch) }}:
+          branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
+        # SourceBranchName = mainline
+        ${{ elseif eq(variables['Build.SourceBranchName'], parameters.componentVarlist[variables['Build.DefinitionName']].mainlineBranch) }}:
+          branchName: ${{ parameters.componentVarList[dependency].mainlineBranch }}
+        # default = staging
+        ${{ else }}:
+          branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
+# Set link to redirect llvm folder
+- ${{ if eq(parameters.skipLlvmSymlink, false) }}:
  - task: Bash@3
    displayName: Symlink from rocm/llvm to rocm/lib/llvm
    inputs:
@@ -325,7 +439,6 @@ steps:
      script: |
        sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
        sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
-        echo "Created symlink from rocm/llvm to rocm/lib/llvm"
  - task: Bash@3
    displayName: Symlink executables from rocm/llvm/bin to rocm/bin
    inputs:
@@ -333,14 +446,7 @@ steps:
      script: |
        for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
          sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
-          echo "Created symlink from rocm/llvm/bin/$file to rocm/bin/$file"
        done
- ${{ if containsValue(parameters.dependencyList, 'rocm-core') }}:
-  - task: Bash@3
-    displayName: Print rocm/.info/version
-    inputs:
-      targetType: inline
-      script: cat $(Agent.BuildDirectory)/rocm/.info/version
 # dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
 # the convention is as follows:
 # unversioned .so is a symlink to major version .so
@@ -377,16 +483,17 @@ steps:
  inputs:
    targetType: inline
    script: ls -la1R $(Agent.BuildDirectory)/rocm
- task: Bash@3
-  displayName: 'Link ROCm shared libraries'
-  inputs:
-    targetType: inline
-    # OS ignores if the ROCm lib folder shows up more than once
-    script: |
-      echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-      echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-      echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-      echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-      sudo cat /etc/ld.so.conf.d/rocm-ci.conf
-      sudo ldconfig -v
-      ldconfig -p
+- ${{ if eq(parameters.skipLibraryLinking, false) }}:
+  - task: Bash@3
+    displayName: 'Link ROCm shared libraries'
+    inputs:
+      targetType: inline
+# OS ignores if the ROCm lib folder shows up more than once
+      script: |
+        echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+        echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+        echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+        echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+        sudo cat /etc/ld.so.conf.d/rocm-ci.conf
+        sudo ldconfig -v
+        ldconfig -p
--- a/.azuredevops/templates/steps/manifest.yml
+++ b/.azuredevops/templates/steps/manifest.yml
@@ -23,14 +23,13 @@ steps:
  inputs:
    targetType: inline
    script: |
-      ${{ iif(or(eq(parameters.os, 'ubuntu2204'), eq(parameters.os, 'ubuntu2404')), 'sudo apt-get install -y jq', '') }}
+      sudo apt-get install -y jq

      # RESOURCES_REPOSITORIES is a runtime variable (not an env var!) that contains quotations and newlines
      # So we need to save it to a file to properly preserve its formatting and contents
      cat <<EOF > resources.repositories
      $(RESOURCES_REPOSITORIES)
      EOF
-      echo "Value of resources.repositories:"
      cat resources.repositories

      IS_TAG_BUILD=$(jq 'has("release_repo")' resources.repositories)
@@ -67,6 +66,8 @@ steps:
        )
      ' resources.repositories)

+      manifest_json=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
+
      dependencies=()
      for manifest_file in $(Pipeline.Workspace)/d/**/manifest_*.json; do
        echo "Processing $manifest_file"
@@ -77,10 +78,6 @@ steps:
      done
      dependencies_json=$(printf '%s\n' "${dependencies[@]}" | jq -s '.')

-      manifest_filename="manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}"
-      echo "##vso[task.setvariable variable=manifest_filename]$manifest_filename"
-      manifest_json=$(Build.ArtifactStagingDirectory)/$manifest_filename.json
-
      jq -n \
        --argjson current "$current" \
        --argjson dependencies "$dependencies_json" \
@@ -114,14 +111,8 @@ steps:
        ')
      dependencies_rows=$(echo $dependencies_rows)
      echo "##vso[task.setvariable variable=dependencies_rows;]$dependencies_rows"
- task: Bash@3
-  displayName: Print manifest.json
-  condition: always()
-  continueOnError: true
-  inputs:
-    targetType: inline
-    script: |
-      cat $(Build.ArtifactStagingDirectory)/$(manifest_filename).json
+
+      cat $manifest_json
 - task: Bash@3
  displayName: Create manifest.html
  condition: always()
@@ -129,10 +120,10 @@ steps:
  inputs:
    targetType: inline
    script: |
-      manifest_html="$(Build.ArtifactStagingDirectory)/$(manifest_filename).html"
+      manifest_html=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
      cat <<EOF > $manifest_html
      <html>
-      <h1>$(manifest_filename)</h1>
+      <h1>Manifest</h1>
      <h2>Current</h2>
      <table border="1">
      <tr>
@@ -172,7 +163,7 @@ steps:
  continueOnError: true
  inputs:
    tabName: Manifest
-    reportDir: $(Build.ArtifactStagingDirectory)/$(manifest_filename).html
+    reportDir: $(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
 - task: Bash@3
  displayName: Save manifest artifact file name
  condition: always()
@@ -181,5 +172,5 @@ steps:
    workingDirectory: $(Pipeline.Workspace)
    targetType: inline
    script: |
-      echo "$(manifest_filename).html" >> pipelineArtifacts.txt
-      echo "$(manifest_filename).json" >> pipelineArtifacts.txt
+      echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
+      echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
--- a/.azuredevops/templates/steps/miopen-get-ck-build.yml
+++ b/.azuredevops/templates/steps/miopen-get-ck-build.yml
@@ -13,10 +13,11 @@ steps:
    CC: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
  inputs:
    targetType: inline
-    workingDirectory: $(Agent.BuildDirectory)/s
+    workingDirectory: $(Build.SourcesDirectory)
    script: |
      AZ_API="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis"
      GH_API="https://api.github.com/repos/ROCm"
+      ARTIFACT_NAME="composablekernelbuild${{ parameters.gpuTarget }}"
      EXIT_CODE=0

      # Try to find an Azure build for the specific CK commit called out in MIOpen's requirements.txt
@@ -38,15 +39,8 @@ steps:
        echo "Found specific CK build ID: $CK_BUILD_ID"
      fi

-      AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
-      ARTIFACT_URL=$(curl -s $AZURE_URL | \
-        jq --arg gfx "${{ parameters.gpuTarget }}" '
-          .value
-          | map(select(.name | test($gfx)))
-          | max_by(.name | capture("_(?<dropNumber>\\d+)").dropNumber | tonumber)
-          | .resource.downloadUrl
-        ' | \
-        tr -d '"')
+      AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
+      ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')

      # If using the specific CK commit and it doesn't have any valid artifacts, use latest successful CK build instead
      if { [[ -z "$ARTIFACT_URL" ]] || [[ "$ARTIFACT_URL" == "null" ]]; } && [[ $EXIT_CODE -eq 0 ]]; then
@@ -54,15 +48,8 @@ steps:
        LATEST_BUILD_URL="$AZ_API/build/builds?definitions=$(COMPOSABLE_KERNEL_PIPELINE_ID)&statusFilter=completed&resultFilter=succeeded&\$top=1&api-version=7.1"
        CK_BUILD_ID=$(curl -s $LATEST_BUILD_URL | jq '.value[0].id')
        echo "Found latest CK build ID: $CK_BUILD_ID"
-        AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
-        ARTIFACT_URL=$(curl -s $AZURE_URL | \
-          jq --arg os "ubuntu2204" --arg gfx "${{ parameters.gpuTarget }}" '
-            .value
-            | map(select(.name | test($os) and test($gfx)))
-            | max_by(.name | capture("_(?<dropNumber>\\d+)").dropNumber | tonumber)
-            | .resource.downloadUrl
-          ' | \
-          tr -d '"')
+        AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
+        ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')
        EXIT_CODE=2
      fi

@@ -70,8 +57,8 @@ steps:
      wget --tries=5 --waitretry=10 --retry-connrefused -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
      unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
      mkdir -p $(Agent.BuildDirectory)/rocm
-      tar -zxvf $(System.ArtifactsDirectory)/composable_kernel*/*.tar.gz -C $(Agent.BuildDirectory)/rocm
-      rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/composable_kernel*
+      tar -zxvf $(System.ArtifactsDirectory)/$ARTIFACT_NAME/*.tar.gz -C $(Agent.BuildDirectory)/rocm
+      rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/$ARTIFACT_NAME

      if [[ $EXIT_CODE -ne 0 ]]; then
        BUILD_COMMIT=$(curl -s $AZ_API/build/builds/$CK_BUILD_ID | jq '.sourceVersion' | tr -d '"')
--- a/.azuredevops/variables-global.yml
+++ b/.azuredevops/variables-global.yml
@@ -23,25 +23,145 @@ variables:
  value: rocm-ci_high_build_pool
 - name: ULTRA_BUILD_POOL
  value: rocm-ci_ultra_build_pool
+- name: ON_PREM_BUILD_POOL
+  value: rocm-ci_build_pool
+- name: LARGE_DISK_BUILD_POOL
+  value: rocm-ci_larger_base_disk_pool
 - name: GFX942_TEST_POOL
  value: gfx942_test_pool
 - name: GFX90A_TEST_POOL
  value: gfx90a_test_pool
 - name: LATEST_RELEASE_VERSION
-  value: 6.4.2
+  value: 6.4.1
 - name: REPO_RADEON_VERSION
-  value: 6.4.2
+  value: 6.4.1
 - name: NEXT_RELEASE_VERSION
  value: 7.0.0
 - name: LATEST_RELEASE_TAG
-  value: rocm-6.4.2
+  value: rocm-6.4.1
 - name: DOCKER_SKIP_GFX
  value: gfx90a
+- name: AMDMIGRAPHX_PIPELINE_ID
+  value: 113
+- name: AMDSMI_PIPELINE_ID
+  value: 99
+- name: AOMP_EXTRAS_PIPELINE_ID
+  value: 111
+- name: AOMP_PIPELINE_ID
+  value: 115
+- name: CLR_PIPELINE_ID
+  value: 145
 - name: COMPOSABLE_KERNEL_PIPELINE_ID
  value: 86
+- name: FLANG_LEGACY_PIPELINE_ID
+  value: 77
+- name: HALF_PIPELINE_ID
+  value: 101
 - name: HALF560_PIPELINE_ID
  value: 68
 - name: HALF560_BUILD_ID
  value: 621
 - name: HIP_PIPELINE_ID
  value: 93
+- name: HIP_TESTS_PIPELINE_ID
+  value: 233
+- name: HIPBLAS_COMMON_PIPELINE_ID
+  value: 223
+- name: HIPBLAS_PIPELINE_ID
+  value: 87
+- name: HIPBLASLT_PIPELINE_ID
+  value: 112
+- name: HIPCUB_PIPELINE_ID
+  value: 277
+- name: HIPFFT_PIPELINE_ID
+  value: 121
+- name: HIPFORT_PIPELINE_ID
+  value: 102
+- name: HIPIFY_PIPELINE_ID
+  value: 92
+- name: HIPRAND_PIPELINE_ID
+  value: 275
+- name: HIPSOLVER_PIPELINE_ID
+  value: 84
+- name: HIPSPARSE_PIPELINE_ID
+  value: 83
+- name: HIPSPARSELT_PIPELINE_ID
+  value: 104
+- name: HIPTENSOR_PIPELINE_ID
+  value: 105
+- name: LLVM_PROJECT_PIPELINE_ID
+  value: 2
+- name: MIOPEN_PIPELINE_ID
+  value: 108
+- name: MIVISIONX_PIPELINE_ID
+  value: 80
+- name: RCCL_PIPELINE_ID
+  value: 107
+- name: RDC_PIPELINE_ID
+  value: 100
+- name: ROCAL_PIPELINE_ID
+  value: 151
+- name: ROCALUTION_PIPELINE_ID
+  value: 89
+- name: ROCBLAS_PIPELINE_ID
+  value: 85
+- name: ROCDBGAPI_PIPELINE_ID
+  value: 135
+- name: ROCDECODE_PIPELINE_ID
+  value: 79
+- name: ROCFFT_PIPELINE_ID
+  value: 120
+- name: ROCGDB_PIPELINE_ID
+  value: 134
+- name: ROCJPEG_PIPELINE_ID
+  value: 262
+- name: ROCM_BANDWIDTH_TEST_PIPELINE_ID
+  value: 88
+- name: ROCM_CMAKE_PIPELINE_ID
+  value: 6
+- name: ROCM_CORE_PIPELINE_ID
+  value: 103
+- name: ROCM_EXAMPLES_PIPELINE_ID
+  value: 216
+- name: ROCM_SMI_LIB_PIPELINE_ID
+  value: 96
+- name: ROCMINFO_PIPELINE_ID
+  value: 91
+- name: ROCMLIR_PIPELINE_ID
+  value: 229
+- name: ROCMVALIDATIONSUITE_PIPELINE_ID
+  value: 106
+- name: ROCPRIM_PIPELINE_ID
+  value: 273
+- name: ROCPROFILER_COMPUTE_PIPELINE_ID
+  value: 257
+- name: ROCPROFILER_REGISTER_PIPELINE_ID
+  value: 1
+- name: ROCPROFILER_SDK_PIPELINE_ID
+  value: 246
+- name: ROCPROFILER_SYSTEMS_PIPELINE_ID
+  value: 255
+- name: ROCPROFILER_PIPELINE_ID
+  value: 143
+- name: ROCPYDECODE_PIPELINE_ID
+  value: 239
+- name: ROCR_DEBUG_AGENT_PIPELINE_ID
+  value: 136
+- name: ROCR_RUNTIME_PIPELINE_ID
+  value: 10
+- name: ROCRAND_PIPELINE_ID
+  value: 274
+- name: ROCSOLVER_PIPELINE_ID
+  value: 81
+- name: ROCSPARSE_PIPELINE_ID
+  value: 98
+- name: ROCTHRUST_PIPELINE_ID
+  value: 276
+- name: ROCTRACER_PIPELINE_ID
+  value: 141
+- name: ROCWMMA_PIPELINE_ID
+  value: 109
+- name: RPP_PIPELINE_ID
+  value: 78
+- name: TRANSFERBENCH_PIPELINE_ID
+  value: 265
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -1,3 +1,38 @@
+summarization
+QKV
+MLPerf
+GovReport
+oss
+openai
+gpt
+SGLang
+amd
+MXFP
+subproject
+ROCpd
+rocpd
+STL
+XCCs
+chiplets
+hipRTC
+nvRTC
+warpSize
+Datacenter
+GST
+IET
+LTO
+MX
+Microscaling
+NANOO
+ROCprof
+affinitization
+amdclang
+benefitting
+demangled
+inlined
+microscaling
+roofline
+torchtitan
 AAC
 ABI
 ACE
@@ -6,7 +41,6 @@ ACS
 AccVGPR
 AccVGPRs
 ALU
-AllReduce
 AMD
 AMDGPU
 AMDGPUs
@@ -14,7 +48,6 @@ AMDMIGraphX
 AMI
 AOCC
 AOMP
-AOT
 AOTriton
 APBDIS
 APIC
@@ -34,7 +67,6 @@ Andrej
 Arb
 Autocast
 BARs
-BatchNorm
 BLAS
 BMC
 BabelStream
@@ -45,7 +77,6 @@ Bootloader
 CAS
 CCD
 CDNA
-CGUI
 CHTML
 CIFAR
 CLI
@@ -83,13 +114,10 @@ ConnectX
 CuPy
 da
 Dashboarding
-Dataloading
 DBRX
 DDR
 DF
 DGEMM
-DGL
-DGLGraph
 dGPU
 dGPUs
 DIMM
@@ -107,7 +135,6 @@ DataFrame
 DataLoader
 DataParallel
 Debian
-decompositions
 DeepSeek
 DeepSpeed
 Dependabot
@@ -116,7 +143,6 @@ DevCap
 DirectX
 Dockerfile
 Doxygen
-dropless
 ELMo
 ENDPGM
 EPYC
@@ -134,12 +160,10 @@ FX
 Filesystem
 FindDb
 Flang
-FlashAttention
 FluxBenchmark
 Fortran
 Fuyu
 GALB
-GAT
 GCC
 GCD
 GCDs
@@ -167,8 +191,6 @@ GPT
 GPU
 GPU's
 GPUs
-Graphbolt
-GraphSage
 GRBM
 GenAI
 GenZ
@@ -178,11 +200,9 @@ HBM
 HCA
 HGX
 HIPCC
-hipDataType
 HIPExtension
 HIPIFY
 HIPification
-hipification
 HIPify
 HPC
 HPCG
@@ -197,7 +217,6 @@ Higgs
 Hyperparameters
 Huggingface
 ICD
-ICT
 ICV
 IDE
 IDEs
@@ -232,7 +251,6 @@ KV
 KVM
 Karpathy's
 KiB
-Kineto
 Keras
 Khronos
 LAPACK
@@ -245,7 +263,6 @@ LM
 LSAN
 LSan
 LTS
-LSTMs
 LanguageCrossEntropy
 LoRA
 MEM
@@ -273,7 +290,6 @@ Makefiles
 Matplotlib
 Matrox
 MaxText
-Megablocks
 Megatrends
 Megatron
 Mellanox
@@ -283,8 +299,6 @@ Miniconda
 MirroredStrategy
 Mixtral
 MosaicML
-MoEs
-Mpops
 Multicore
 Multithreaded
 MyEnvironment
@@ -298,7 +312,6 @@ NIC
 NICs
 NLI
 NLP
-NN
 NPKit
 NPS
 NSP
@@ -335,7 +348,6 @@ OpenMPI
 OpenSSL
 OpenVX
 OpenXLA
-Optim
 Oversubscription
 PagedAttention
 Pallas
@@ -374,7 +386,6 @@ RDC's
 RDMA
 RDNA
 README
-Recomputation
 RHEL
 RMW
 RNN
@@ -407,13 +418,11 @@ Ryzen
 SALU
 SBIOS
 SCA
-ScaledGEMM
 SDK
 SDMA
 SDPA
 SDRAM
 SENDMSG
-SGLang
 SGPR
 SGPRs
 SHA
@@ -449,8 +458,6 @@ TCI
 TCIU
 TCP
 TCR
-TensorRT
-TensorFloat
 TF
 TFLOPS
 TP
@@ -458,8 +465,6 @@ TPS
 TPU
 TPUs
 TSME
-Taichi
-Taichi's
 Tagram
 TensileLite
 TensorBoard
@@ -539,7 +544,6 @@ allocator
 allocators
 amdgpu
 api
-aten
 atmi
 atomics
 autogenerated
@@ -710,7 +714,6 @@ installable
 interop
 interprocedural
 intra
-intrinsics
 invariants
 invocating
 ipo
@@ -729,13 +732,11 @@ linearized
 linter
 linux
 llvm
-lm
 localscratch
 logits
 lossy
 macOS
 matchers
-megatron
 microarchitecture
 migraphx
 migratable
@@ -807,7 +808,6 @@ quantile
 quantizer
 quasirandom
 queueing
-qwen
 radeon
 rccl
 rdc
@@ -816,7 +816,6 @@ reStructuredText
 redirections
 refactorization
 reformats
-reinforcememt
 repo
 repos
 representativeness
@@ -824,7 +823,6 @@ req
 resampling
 rescaling
 reusability
-RLHF
 roadmap
 roc
 rocAL
@@ -862,7 +860,6 @@ roctracer
 rst
 runtime
 runtimes
-ResNet
 sL
 scalability
 scalable
@@ -871,7 +868,6 @@ seealso
 sendmsg
 seqs
 serializers
-sglang
 shader
 sharding
 sigmoid
@@ -879,7 +875,6 @@ sm
 smi
 softmax
 spack
-spmm
 src
 stochastically
 strided
@@ -888,7 +883,6 @@ subdirectory
 subexpression
 subfolder
 subfolders
-submatrix
 submodule
 submodules
 subnet
@@ -913,7 +907,6 @@ torchvision
 tqdm
 tracebacks
 txt
-TopK
 uarch
 uncached
 uncacheable
@@ -941,7 +934,6 @@ vectorize
 vectorized
 vectorizer
 vectorizes
-verl
 virtualize
 virtualized
 vjxb
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/README.md
+++ b/README.md
@@ -23,6 +23,9 @@ source software compilers, debuggers, and libraries. ROCm is fully integrated in
 > A new open source build platform for ROCm is under development at
 > https://github.com/ROCm/TheRock, featuring a unified CMake build with bundled
 > dependencies, Windows support, and more.
+>
+> The instructions below describe the prior process for building from source
+> which will be replaced once TheRock is mature enough.

 ## Getting and Building ROCm from Source

--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,690 +0,0 @@
-<!-- Do not edit this file!                                 -->
-<!-- This file is autogenerated with                        -->
-<!--   tools/autotag/tag_script.py                          -->
-<!-- Disable lints since this is an auto-generated file.    -->
-<!-- markdownlint-disable blanks-around-headers             -->
-<!-- markdownlint-disable no-duplicate-header               -->
-<!-- markdownlint-disable no-blanks-blockquote              -->
-<!-- markdownlint-disable ul-indent                         -->
-<!-- markdownlint-disable no-trailing-spaces                -->
-<!-- markdownlint-disable reference-links-images            -->
-<!-- markdownlint-disable no-missing-space-atx              -->
-<!-- spellcheck-disable                                     -->
-# ROCm 6.4.2 release notes
-
-The release notes provide a summary of notable changes since the previous ROCm release.
-
- [Release highlights](#release-highlights)
-
- [Operating system and hardware support changes](#operating-system-and-hardware-support-changes)
-
- [ROCm components versioning](#rocm-components)
-
- [Detailed component changes](#detailed-component-changes)
-
- [ROCm known issues](#rocm-known-issues)
-
- [ROCm resolved issues](#rocm-resolved-issues)
-
- [ROCm upcoming changes](#rocm-upcoming-changes)
-
-```{note}
-If you’re using AMD Radeon™ PRO or Radeon GPUs in a workstation setting with a display connected, see the [Use ROCm on Radeon GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility/native_linux/native_linux_compatibility.html)
-documentation to verify compatibility and system requirements.
-```
-
-## Release highlights
-
-The following are notable new features and improvements in ROCm 6.4.2. For changes to individual components, see
-[Detailed component changes](#detailed-component-changes).
-
-### ROCm Compute Profiler enhancements
-
-[ROCm Compute Profiler](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/index.html) includes the following changes:
-
-* The ``--roofline-data-type`` option now supports FP8, FP16, BF16, FP32, FP64, I8, I32, and I64 data types. This is dependent on the GPU architecture. For more information, see [Roofline options](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/docs-6.4.2/how-to/profile/mode.html#roofline-options).
-
-* ROCm Compute Profiler now uses [AMD SMI](https://rocm.docs.amd.com/projects/amdsmi/en/latest/index.html) instead of [ROCm SMI](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/index.html). The AMD System Management Interface Library (AMD SMI) is a successor to ROCm SMI. It is a unified system management interface tool that provides a user-space interface for applications to monitor and control GPU applications and gives users the ability to query information about drivers and GPUs on the system. For more information, see [https://github.com/ROCm/amdsmi](https://github.com/ROCm/amdsmi) and the [AMD SMI documentation](https://rocm.docs.amd.com/projects/amdsmi/en/latest/index.html).
-
-* ROCm Compute Profiler has added 8-bit floating point (FP8) metrics support for AMD Instinct MI300 series accelerators. For more information, see [System Speed-of-Light](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/docs-6.4.2/conceptual/system-speed-of-light.html).
-
-### rocSOLVER enhancements
-
-rocSOLVER has improved the performance of eigensolvers and singular value decomposition (SVD). For more information, see [rocSOLVER documentation](https://rocm.docs.amd.com/projects/rocSOLVER/en/docs-6.4.2/index.html).
-
-### ROCm Offline Installer Creator updates
-
-The ROCm Offline Installer Creator 6.4.2 includes the following features and improvements:
- 
-* Added support for Oracle Linux 8.10 and 9.6, and SLES 15 SP7.
-* Additional package options for the Offline Installer Creator, including `amd-smi`, `rocdecode`, `rocjpeg`, and `rdc`.
-* ROCm meta packages are now used for selecting ROCm components and use cases.
-* Improved separation of kernel/driver and ROCm prerequisite packages to reduce the size of ROCm-only or driver-only offline installers.
- 
-In addition, the option to build an offline installer based on ROCm version 5.7.3 has been removed. To build an offline installer for ROCm 5.7.3, use the Offline Installer Creator from version 6.4.1 or earlier. See [ROCm Offline Installer Creator](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.2/install/rocm-offline-installer.html) for more information.
-
-### ROCm Runfile Installer updates
-
-The ROCm Runfile Installer 6.4.2 adds support for Oracle Linux 8.10 and 9.6 (using the RHEL 8 or 9 .run files), Debian 12 (using the Ubuntu 22.04 .run file), and SLES 15 SP7. It also fixes permission settings issues during ROCm and AMDGPU driver installation. For more information, see [ROCm Runfile Installer](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.2/install/rocm-runfile-installer.html).
-
-### ROCm documentation updates
-
-ROCm documentation continues to be updated to provide clearer and more comprehensive guidance for a wider variety of user needs and use cases.
-
-* [Tutorials for AI developers](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/) have been expanded with the following four new tutorials:
-    * Inference tutorial: [AI agent with MCPs using vLLM and PydanticAI](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/inference/build_airbnb_agent_mcp.html)
-    * GPU development and optimization tutorials:
-        * [Kernel development and optimization with Triton](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/gpu_dev_optimize/triton_kernel_dev.html)
-        * [Profiling Llama-4 inference with vLLM](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/gpu_dev_optimize/llama4_profiling_vllm.html)
-        * [FP8 quantization with AMD Quark for vLLM](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/gpu_dev_optimize/fp8_quantization_quark_vllm.html)
-    
-    For more information about the changes, see [Changelog for the AI Developer Hub](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/changelog.html).
-
-* ROCm provides a comprehensive ecosystem for deep learning development. For more details, see [Deep learning frameworks for ROCm](https://rocm.docs.amd.com/en/docs-6.4.2/how-to/deep-learning-rocm.html). As of July 2025, AMD ROCm provides support for the following additional deep learning frameworks:
-
-    * Deep Graph Library is an easy-to-use, high-performance, and scalable Python package for deep learning on graphs. DGL is framework agnostic, meaning if a deep graph model is a component in an end-to-end application, the rest of the logic is implemented using PyTorch. It is currently supported on ROCm 6.4.0. For more information, see [DGL compatibility](https://rocm.docs.amd.com/en/docs-6.4.2/compatibility/ml-compatibility/dgl-compatibility.html).
-    * Stanford Megatron-LM is a large-scale language model training framework. It’s designed to train massive transformer-based language models efficiently by model and data parallelism. It is currently supported on ROCm 6.3.0. For more information, see [Stanford Megatron-LM compatibility](https://rocm.docs.amd.com/en/docs-6.4.2/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.html).
-    * Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs). verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support. It is currently supported on ROCm 6.2.0. For more information, see [verl compatibility](https://rocm.docs.amd.com/en/docs-6.4.2/compatibility/ml-compatibility/verl-compatibility.html).
-
-* Documentation for the new [ROCprof Compute Viewer](https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/docs-6.4.2/) was added in May 2025. This tool is used to visualize and analyze GPU thread trace data collected using [rocprofv3](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/index.html). Note that [ROCprof Compute Viewer](https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/docs-6.4.2/) is in an early access state. Running production workloads is not recommended.
-
-* The AMDGPU installer documentation has been removed to encourage the use of the package manager for ROCm installation. While the package manager is the recommended method, you can still install ROCm using the AMDGPU installer by following the [legacy process](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/install/install-methods/amdgpu-installer-index.html). Ensure to update the command with the intended ROCm version before running it. For more information, see [Installation via native package manager](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.2/install/install-methods/package-manager-index.html).
-
-## Operating system and hardware support changes
-
-ROCm 6.4.2 adds support for SLES 15 SP7. For more information, see [SLES installation](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.2/install/install-methods/package-manager/package-manager-sles.html).
-
-ROCm 6.4.2 marks the end of support (EoS) for RHEL 9.5. 
-
-ROCm 6.4.2 adds support for RDNA3 architecture-based [Radeon RX 7700 XT](https://www.amd.com/en/products/graphics/desktops/radeon/7000-series/amd-radeon-rx-7700-xt.html) GPU. This GPU is supported on Ubuntu 24.04.2 and RHEL 9.6.
-For details, see the full list of [Supported GPUs
-(Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.2/reference/system-requirements.html#supported-gpus).
-
-See the [Compatibility
-matrix](../../docs/compatibility/compatibility-matrix.rst)
-for more information about operating system and hardware compatibility.
-
-## ROCm components
-
-The following table lists the versions of ROCm components for ROCm 6.4.2, including any version
-changes from 6.4.1 to 6.4.2. Click the component's updated version to go to a list of its changes.
-Click {fab}`github` to go to the component's source code on GitHub.
-
-<div class="pst-scrollable-table-container">
-    <table id="rocm-rn-components" class="table">
-        <thead>
-            <tr>
-                <th>Category</th>
-                <th>Group</th>
-                <th>Name</th>
-                <th>Version</th>
-                <th></th>
-            </tr>
-        </thead>
-        <colgroup>
-            <col span="1">
-            <col span="1">
-        </colgroup>
-        <tbody class="rocm-components-libs rocm-components-ml">
-            <tr>
-                <th rowspan="9">Libraries</th>
-                <th rowspan="9">Machine learning and computer vision</th>
-                <td><a href="https://rocm.docs.amd.com/projects/composable_kernel/en/docs-6.4.2/index.html">Composable Kernel</a></td>
-                <td>1.1.0</td>
-                <td><a href="https://github.com/ROCm/composable_kernel"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/AMDMIGraphX/en/docs-6.4.2/index.html">MIGraphX</a></td>
-                <td>2.12.0</td>
-                <td><a href="https://github.com/ROCm/AMDMIGraphX"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/MIOpen/en/docs-6.4.2/index.html">MIOpen</a></td>
-                <td>3.4.0</td>
-                <td><a href="https://github.com/ROCm/MIOpen"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/MIVisionX/en/docs-6.4.2/index.html">MIVisionX</a></td>
-                <td>3.2.0</td>
-                <td><a href="https://github.com/ROCm/MIVisionX"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocAL/en/docs-6.4.2/index.html">rocAL</a></td>
-                <td>2.2.0</td>
-                <td><a href="https://github.com/ROCm/rocAL"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocDecode/en/docs-6.4.2/index.html">rocDecode</a></td>
-                <td>0.10.0</td>
-                <td><a href="https://github.com/ROCm/rocDecode"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocJPEG/en/docs-6.4.2/index.html">rocJPEG</a></td>
-                <td>0.8.0</td>
-                <td><a href="https://github.com/ROCm/rocJPEG"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocPyDecode/en/docs-6.4.2/index.html">rocPyDecode</a></td>
-                <td>0.3.1</td>
-                <td><a href="https://github.com/ROCm/rocPyDecode"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rpp/en/docs-6.4.2/index.html">RPP</a></td>
-                <td>1.9.10</td>
-                <td><a href="https://github.com/ROCm/rpp"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-libs rocm-components-communication tbody-reverse-zebra">
-            <tr>
-                <th rowspan="2"></th>
-                <th rowspan="2">Communication</th>
-                <td><a href="https://rocm.docs.amd.com/projects/rccl/en/docs-6.4.2/index.html">RCCL</a></td>
-                <td>2.22.3&nbsp;&Rightarrow;&nbsp;<a href="#rccl-2-22-3">2.22.3</td>
-                <td><a href="https://github.com/ROCm/rccl"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-            <td><a href="https://rocm.docs.amd.com/projects/rocSHMEM/en/docs-6.4.2/index.html">rocSHMEM</a></td>
-                <td>2.0.0&nbsp;&Rightarrow;&nbsp;<a href="#rocshmem-2-0-1">2.0.1</td>
-                <td><a href="https://github.com/ROCm/rocSHMEM"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-libs rocm-components-math tbody-reverse-zebra">
-            <tr>
-                <th rowspan="16"></th>
-                <th rowspan="16">Math</th>
-                <td><a href="https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.4.2/index.html">hipBLAS</a></td>
-                <td>2.4.0</td>
-                <td><a href="https://github.com/ROCm/hipBLAS"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipBLASLt/en/docs-6.4.2/index.html">hipBLASLt</a></td>
-                <td>0.12.1&nbsp;&Rightarrow;&nbsp;<a href="#hipblaslt-0-12-1">0.12.1</td>
-                <td><a href="https://github.com/ROCm/hipBLASLt"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipFFT/en/docs-6.4.2/index.html">hipFFT</a></td>
-                <td>1.0.18</td>
-                <td><a href="https://github.com/ROCm/hipFFT"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipfort/en/docs-6.4.2/index.html">hipfort</a></td>
-                <td>0.6.0</td>
-                <td><a href="https://github.com/ROCm/hipfort"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipRAND/en/docs-6.4.2/index.html">hipRAND</a></td>
-                <td>2.12.0</td>
-                <td><a href="https://github.com/ROCm/hipRAND"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipSOLVER/en/docs-6.4.2/index.html">hipSOLVER</a></td>
-                <td>2.4.0</td>
-                <td><a href="https://github.com/ROCm/hipSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipSPARSE/en/docs-6.4.2/index.html">hipSPARSE</a></td>
-                <td>3.2.0</td>
-                <td><a href="https://github.com/ROCm/hipSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipSPARSELt/en/docs-6.4.2/index.html">hipSPARSELt</a></td>
-                <td>0.2.3</td>
-                <td><a href="https://github.com/ROCm/hipSPARSELt"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.4.2/index.html">rocALUTION</a></td>
-                <td>3.2.3</td>
-                <td><a href="https://github.com/ROCm/rocALUTION"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocBLAS/en/docs-6.4.2/index.html">rocBLAS</a></td>
-                <td>4.4.0&nbsp;&Rightarrow;&nbsp;<a href="#rocblas-4-4-1">4.4.1</td></td>
-                <td><a href="https://github.com/ROCm/rocBLAS"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocFFT/en/docs-6.4.2/index.html">rocFFT</a></td>
-                <td>1.0.32</td>
-                <td><a href="https://github.com/ROCm/rocFFT"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocRAND/en/docs-6.4.2/index.html">rocRAND</a></td>
-                <td>3.3.0</td>
-                <td><a href="https://github.com/ROCm/rocRAND"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocSOLVER/en/docs-6.4.2/index.html">rocSOLVER</a></td>
-                <td>3.28.0&nbsp;&Rightarrow;&nbsp;<a href="#rocsolver-3-28-2">3.28.2</td>
-                <td><a href="https://github.com/ROCm/rocSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocSPARSE/en/docs-6.4.2/index.html">rocSPARSE</a></td>
-                <td>3.4.0</td>
-                <td><a href="https://github.com/ROCm/rocSPARSE"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocWMMA/en/docs-6.4.2/index.html">rocWMMA</a></td>
-                <td>1.7.0</td>
-                <td><a href="https://github.com/ROCm/rocWMMA"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/Tensile/en/docs-6.4.2/src/index.html">Tensile</a></td>
-                <td>4.43.0</td>
-                <td><a href="https://github.com/ROCm/Tensile"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-libs rocm-components-primitives tbody-reverse-zebra">
-            <tr>
-                <th rowspan="4"></th>
-                <th rowspan="4">Primitives</th>
-                <td><a href="https://rocm.docs.amd.com/projects/hipCUB/en/docs-6.4.2/index.html">hipCUB</a></td>
-                <td>3.4.0</td>
-                <td><a href="https://github.com/ROCm/hipCUB"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/hipTensor/en/docs-6.4.2/index.html">hipTensor</a></td>
-                <td>1.5.0</td>
-                <td><a href="https://github.com/ROCm/hipTensor"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocPRIM/en/docs-6.4.2/index.html">rocPRIM</a></td>
-                <td>3.4.0&nbsp;&Rightarrow;&nbsp;<a href="#rocprim-3-4-1">3.4.1</td>
-                <td><a href="https://github.com/ROCm/rocPRIM"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocThrust/en/docs-6.4.2/index.html">rocThrust</a></td>
-                <td>3.3.0</td>
-                <td><a href="https://github.com/ROCm/rocThrust"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-tools rocm-components-system tbody-reverse-zebra">
-            <tr>
-                <th rowspan="7">Tools</th>
-                <th rowspan="7">System management</th>
-                <td><a href="https://rocm.docs.amd.com/projects/amdsmi/en/docs-6.4.2/index.html">AMD SMI</a></td>
-                <td>25.4.2&nbsp;&Rightarrow;&nbsp;<a href="#amd-smi-25-5-1">25.5.1</a></td>
-                <td><a href="https://github.com/ROCm/amdsmi"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rdc/en/docs-6.4.2/index.html">ROCm Data Center Tool</a></td>
-                <td>0.3.0</td>
-                <td><a href="https://github.com/ROCm/rdc"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocminfo/en/docs-6.4.2/index.html">rocminfo</a></td>
-                <td>1.0.0</td>
-                <td><a href="https://github.com/ROCm/rocminfo"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocm_smi_lib/en/docs-6.4.2/index.html">ROCm SMI</a></td>
-                <td>7.5.0</td>
-                <td><a href="https://github.com/ROCm/rocm_smi_lib"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/docs-6.4.2/index.html">ROCm Validation Suite</a></td>
-                <td>1.1.0&nbsp;&Rightarrow;&nbsp;<a href="#rocm-validation-suite-1-1-0">1.1.0</td>
-                <td><a href="https://github.com/ROCm/ROCmValidationSuite"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-tools rocm-components-perf">
-            <tr>
-                <th rowspan="6"></th>
-                <th rowspan="6">Performance</th>
-                <td><a href="https://rocm.docs.amd.com/projects/rocm_bandwidth_test/en/docs-6.4.2/index.html">ROCm Bandwidth
-                        Test</a></td>
-                <td>1.4.0</td>
-                <td><a href="https://github.com/ROCm/rocm_bandwidth_test/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocprofiler-compute/en/docs-6.4.2/index.html">ROCm Compute Profiler</a></td>
-                <td>3.1.0&nbsp;&Rightarrow;&nbsp;<a href="#rocm-compute-profiler-3-1-1">3.1.1</td>
-                <td><a href="https://github.com/ROCm/rocprofiler-compute"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocprofiler-systems/en/docs-6.4.2/index.html">ROCm Systems Profiler</a></td>
-                <td>1.0.1&nbsp;&Rightarrow;&nbsp;<a href="#rocm-systems-profiler-1-0-2">1.0.2</td>
-                <td><a href="https://github.com/ROCm/rocprofiler-systems"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.2/index.html">ROCProfiler</a></td>
-                <td>2.0.0</td>
-                <td><a href="https://github.com/ROCm/ROCProfiler/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/docs-6.4.2/index.html">ROCprofiler-SDK</a></td>
-                <td>0.6.0</td>
-                <td><a href="https://github.com/ROCm/rocprofiler-sdk/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr >
-                <td><a href="https://rocm.docs.amd.com/projects/roctracer/en/docs-6.4.2/index.html">ROCTracer</a></td>
-                <td>4.1.0</td>
-                <td><a href="https://github.com/ROCm/ROCTracer/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-tools rocm-components-dev">
-            <tr>
-                <th rowspan="5"></th>
-                <th rowspan="5">Development</th>
-                <td><a href="https://rocm.docs.amd.com/projects/HIPIFY/en/docs-6.4.2/index.html">HIPIFY</a></td>
-                <td>19.0.0</td>
-                <td><a href="https://github.com/ROCm/HIPIFY/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/ROCdbgapi/en/docs-6.4.2/index.html">ROCdbgapi</a></td>
-                <td>0.77.2</td>
-                <td><a href="https://github.com/ROCm/ROCdbgapi/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/ROCmCMakeBuildTools/en/docs-6.4.2/index.html">ROCm CMake</a></td>
-                <td>0.14.0</td>
-                <td><a href="https://github.com/ROCm/rocm-cmake/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/ROCgdb/en/docs-6.4.2/index.html">ROCm Debugger (ROCgdb)</a>
-                </td>
-                <td>15.2</td>
-                <td><a href="https://github.com/ROCm/ROCgdb/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/rocr_debug_agent/en/docs-6.4.2/index.html">ROCr Debug Agent</a>
-                </td>
-                <td>2.0.4</td>
-                <td><a href="https://github.com/ROCm/rocr_debug_agent/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-compilers tbody-reverse-zebra">
-            <tr>
-                <th rowspan="2" colspan="2">Compilers</th>
-                <td><a href="https://rocm.docs.amd.com/projects/HIPCC/en/docs-6.4.2/index.html">HIPCC</a></td>
-                <td>1.1.1</td>
-                <td><a href="https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.2/index.html">llvm-project</a></td>
-                <td>19.0.0</td>
-                <td><a href="https://github.com/ROCm/llvm-project/"><i
-                            class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-        <tbody class="rocm-components-runtimes tbody-reverse-zebra">
-            <tr>
-                <th rowspan="2" colspan="2">Runtimes</th>
-                <td><a href="https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.2/index.html">HIP</a></td>
-                <td>6.4.1&nbsp;&Rightarrow;&nbsp;<a href="#hip-6-4-2">6.4.2</td>
-                <td><a href="https://github.com/ROCm/HIP/"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-            <tr>
-                <td><a href="https://rocm.docs.amd.com/projects/ROCR-Runtime/en/docs-6.4.2/index.html">ROCr Runtime</a></td>
-                <td>1.15.0</td>
-                <td><a href="https://github.com/ROCm/ROCR-Runtime/"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
-        </tbody>
-    </table>
-</div>
-
-## Detailed component changes
-
-The following sections describe key changes to ROCm components.
-
-```{note}
-For a historical overview of ROCm component updates, see the {doc}`ROCm consolidated changelog </release/changelog>`.
-```
-
-### **AMD SMI** (25.5.1)
-
-#### Added
-
- Compute Unit Occupancy information per process.
-
- Support for getting the GPU Board voltage.
-
- New firmware PLDM_BUNDLE. `amd-smi firmware` can now show the PLDM Bundle on supported systems.
-
- `amd-smi ras --afid --cper-file <file_path>` to decode CPER records.
-
-#### Changed
-
- Padded `asic_serial` in `amdsmi_get_asic_info` with 0s.
-
- Renamed field `COMPUTE_PARTITION` to `ACCELERATOR_PARTITION` in CLI call `amd-smi --partition`.
-
-#### Resolved issues
-
- Corrected VRAM memory calculation in `amdsmi_get_gpu_process_list`. Previously, the VRAM memory usage reported by `amdsmi_get_gpu_process_list` was inaccurate and was calculated using KB instead of KiB.
-
-```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
-```
-
-### **HIP** (6.4.2)
-
-#### Added
-
-* HIP API implementation for `hipEventRecordWithFlags`, records an event in the specified stream with flags.
-* Support for the pointer attribute `HIP_POINTER_ATTRIBUTE_CONTEXT`.
-* Support for the flags `hipEventWaitDefault` and `hipEventWaitExternal`.
-
-#### Optimized
-
-* Improved implementation in `hipEventSynchronize`, HIP runtime now makes internal callbacks as non-blocking operations to improve performance.
-
-#### Resolved issues
-
-* Issue of dependency on `libgcc-s1` during rocm-dev install on Debian Buster. HIP runtime removed this Debian package dependency, and uses `libgcc1` instead for this distros.
-* Building issue for `COMGR` dynamic load on Fedora and other Distros. HIP runtime now doesn't link against `libamd_comgr.so`.
-* Failure in the API `hipStreamDestroy`, when stream type is `hipStreamLegacy`. The API now returns error code `hipErrorInvalidResourceHandle` on this condition.
-* Kernel launch errors, such as `shared object initialization failed`, `invalid device function` or `kernel execution failure`. HIP runtime now loads `COMGR` properly considering the file with its name and mapped image.
-* Memory access fault in some applications. HIP runtime fixed offset accumulation in memory address.
-* The memory leak in virtual memory management (VMM). HIP runtime now uses the size of handle for allocated memory range instead of actual size for physical memory, which fixed the issue of address clash with VMM.
-* Large memory allocation issue. HIP runtime now checks GPU video RAM and system RAM properly and sets size limits during memory allocation either on the host or the GPU device.
-* Support of `hipDeviceMallocContiguous` flags in `hipExtMallocWithFlags()`. It now enables `HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG` in the memory pool allocation on GPU device.
-* Radom memory segmentation fault in handling `GraphExec` object release and `hipDeviceSyncronization`. HIP runtime now uses internal device synchronize function in `__hipUnregisterFatBinary`. 
-
-### **hipBLASLt** (0.12.1)
-
-#### Added
-
-* Support for gfx1151 on Linux, complementing the previous support in the HIP SDK for Windows.
-
-### **RCCL** (2.22.3)
-
-#### Added
-
-* Added support for the LL128 protocol on gfx942.
-
-### **rocBLAS** (4.4.1)
-
-#### Resolved issues
-
-* rocBLAS might have failed to produce correct results for cherk/zherk on gfx90a/gfx942 with problem sizes k > 500 due to the imaginary portion on the C matrix diagonal not being zeros. rocBLAS now zeros the imaginary portion.
-
-### **ROCm Compute Profiler** (3.1.1)
-
-#### Added
-
-* 8-bit floating point (FP8) metrics support for AMD Instinct MI300 GPUs.
-* Additional data types for roofline: FP8, FP16, BF16, FP32, FP64, I8, I32, I64 (dependent on the GPU architecture).
-* Data type selection option ``--roofline-data-type / -R`` for roofline profiling. The default data type is FP32.
-
-#### Changed
-
-* Changed dependency from `rocm-smi` to `amd-smi`.
-
-#### Resolved issues
-
-* Fixed a crash related to Agent ID caused by the new format of the `rocprofv3` output CSV file.
-
-### **ROCm Systems Profiler** (1.0.2)
-
-#### Optimized
-
-* Improved readability of the OpenMP target offload traces by showing on a single Perfetto track.
-
-#### Resolved issues
-
-* Fixed the file path to the script that merges Perfetto files from multi-process MPI runs. The script has also been renamed from `merge-multiprocess-output.sh` to `rocprof-sys-merge-output.sh`.
-
-### **ROCm Validation Suite** (1.1.0)
-
-#### Added
-
-* NPS2/DPX and NPS4/CPX partition modes support for AMD Instinct MI300X.
-
-### **rocPRIM** (3.4.1)
-
-#### Upcoming changes
-
-* Changes to the template parameters of warp and block algorithms will be made in an upcoming release.
-* Due to an upcoming compiler change, the following symbols related to warp size have been marked as deprecated and will be removed in an upcoming major release:
-    * `rocprim::device_warp_size()`. This has been replaced by `rocprim::arch::wavefront::min_size()` and `rocprim::arch::wavefront::max_size()` for compile-time constants. Use these when allocating global or shared memory. For run-time constants, use `rocprim::arch::wavefront::size()`.
-  * `rocprim::warp_size()`
-  * `ROCPRIM_WAVEFRONT_SIZE`
-
-* The default scan accumulator types for device-level scan algorithms will be changed in an upcoming release, resulting in a breaking change. Previously, the default accumulator type was set to the input type for the inclusive scans and to the initial value type for the exclusive scans. This could lead to unexpected overflow if the input or initial type was smaller than the output type when the accumulator type wasn't explicitly set using the `AccType` template parameter. The new default accumulator types will be set to the type that results when the input or initial value type is applied to the scan operator.  
-
-    The following is the complete list of affected functions and how their default accumulator types are changing:
-    
-    * `rocprim::inclusive_scan`
-        * current default: `class AccType = typename std::iterator_traits<InputIterator>::value_type>`
-        * future default: `class AccType = rocprim::invoke_result_binary_op_t<typename std::iterator_traits<InputIterator>::value_type, BinaryFunction>`
-    * `rocprim::deterministic_inclusive_scan`
-        * current default: `class AccType = typename std::iterator_traits<InputIterator>::value_type>`
-        * future default: `class AccType = rocprim::invoke_result_binary_op_t<typename std::iterator_traits<InputIterator>::value_type, BinaryFunction>`
-    * `rocprim::exclusive_scan`
-        * current default: `class AccType = detail::input_type_t<InitValueType>>`
-        * future default: `class AccType = rocprim::invoke_result_binary_op_t<rocprim::detail::input_type_t<InitValueType>, BinaryFunction>`
-    * `rocprim::deterministic_exclusive_scan`
-        * current default: `class AccType = detail::input_type_t<InitValueType>>`
-        * future default: `class AccType = rocprim::invoke_result_binary_op_t<rocprim::detail::input_type_t<InitValueType>, BinaryFunction>`
-
-* `rocprim::load_cs` and `rocprim::store_cs` are deprecated and will be removed in an upcoming release. Alternatively, you can use `rocprim::load_nontemporal` and `rocprim::store_nontemporal` to load and store values in specific conditions (like bypassing the cache) for `rocprim::thread_load` and `rocprim::thread_store`.
-
-### **rocSHMEM** (2.0.1)
-
-#### Resolved issues
-
-* Incorrect output for `rocshmem_ctx_my_pe` and `rocshmem_ctx_n_pes`.
-* Multi-team errors by providing team specific buffers in `rocshmem_ctx_wg_team_sync`.
-* Missing implementation of `rocshmem_g` for IPC conduit.
-
-### **rocSOLVER** (3.28.2)
-
-#### Added
-
-* Hybrid computation support for existing routines, such as STERF.
-* SVD for general matrices based on Cuppen's Divide and Conquer algorithm:
-    - GESDD (with batched and strided\_batched versions)
-
-#### Optimized
-
-* Reduced the device memory requirements for STEDC, SYEVD/HEEVD, and SYGVD/HEGVD.
-* Improved the performance of STEDC and divide and conquer Eigensolvers.
-* Improved the performance of SYTRD, the initial step of the Eigensolvers that start with the tridiagonalization of the input matrix.
-
-## ROCm known issues
-
-ROCm known issues are noted on {fab}`github` [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). For known
-issues related to individual components, review the [Detailed component changes](#detailed-component-changes).
-
-## ROCm resolved issues
-
-The following are previously known issues resolved in this release. For resolved issues related to
-individual components, review the [Detailed component changes](#detailed-component-changes).
-
-### AMD SMI CLI: CPER entries not dumped continuously when using follow flag
-
-An issue where CPER entries were not streamed continuously as intended when using the `--follow` flag with `amd-smi ras --cper` has been resolved. See [GitHub issue #4768](https://github.com/ROCm/ROCm/issues/4768).
-
-### Instinct MI300X reports incorrect raw GPU timestamps
-
-An issue where the command processor firmware reported incorrect raw GPU timestamps on MI300X accelerators has been resolved. See [GitHub issue #4079](https://github.com/ROCm/ROCm/issues/4079).
-
-### MIOpen generates incorrect results for particular input with FP32 data type
-
-An issue where MIOpen generated incorrect results on the `conv2dbackward` function for a particular input with 32-bit floating point (FP32) data types has been resolved. The issue was only specific to FP32 data types with 2 * 2 kernel size and dilation 2 * 1. See [GitHub issue #4606](https://github.com/ROCm/ROCm/issues/4606). 
-
-## ROCm upcoming changes
-
-The following changes to the ROCm software stack are anticipated for future releases.
-
-### AMD SMI migration to AMDGPU driver repository
-
-In a future release, [AMD SMI](https://github.com/ROCm/amdsmi) will be relocated from the ROCm organization repository to a new AMDTools repository to better align with its system-level functionality. `amd-smi-lib` will no longer be included in the `rocm-developer-tools` meta-package included with your standard ROCm installation. Instead, it will be packaged with the AMDGPU driver installation.
-
-### ROCm SMI deprecation
-
-[ROCm SMI](https://github.com/ROCm/rocm_smi_lib) will be phased out in an
-upcoming ROCm release and will enter maintenance mode. After this transition,
-only critical bug fixes will be addressed and no further feature development
-will take place.
- 
-It's strongly recommended to transition your projects to [AMD
-SMI](https://github.com/ROCm/amdsmi), the successor to ROCm SMI. AMD SMI
-includes all the features of the ROCm SMI and will continue to receive regular
-updates, new functionality, and ongoing support. For more information on AMD
-SMI, see the [AMD SMI documentation](https://rocm.docs.amd.com/projects/amdsmi/en/latest/).
-
-### ROCTracer, ROCProfiler, rocprof, and rocprofv2 deprecation
-
-Development and support for ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` are being phased out in favor of ROCprofiler-SDK in upcoming ROCm releases. Starting with ROCm 6.4, only critical defect fixes will be addressed for older versions of the profiling tools and libraries. All users are encouraged to upgrade to the latest version of the ROCprofiler-SDK library and the (`rocprofv3`) tool to ensure continued support and access to new features. ROCprofiler-SDK is still in beta today and will be production-ready in a future ROCm release.
- 
-It's anticipated that ROCTracer, ROCProfiler, `rocprof`, and `rocprofv2` will reach end-of-life by future releases, aligning with Q1 of 2026.
-
-### AMDGPU wavefront size compiler macro deprecation
-
-Access to the wavefront size as a compile-time constant via the `__AMDGCN_WAVEFRONT_SIZE`
-and `__AMDGCN_WAVEFRONT_SIZE__` macros or the `constexpr warpSize` variable is deprecated
-and will be disabled in a future release. 
-
-* The `__AMDGCN_WAVEFRONT_SIZE__` macro and `__AMDGCN_WAVEFRONT_SIZE` alias will be removed in an upcoming release.
-  It is recommended to remove any use of this macro. For more information, see
-  [AMDGPU support](https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.2/LLVM/clang/html/AMDGPUSupport.html).
-* `warpSize` will only be available as a non-`constexpr` variable. Where required,
-  the wavefront size should be queried via the `warpSize` variable in device code,
-  or via `hipGetDeviceProperties` in host code. Neither of these will result in a compile-time constant. For more information, see [warpSize](https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.2/how-to/hip_cpp_language_extensions.html#warpsize).
-* For cases where compile-time evaluation of the wavefront size cannot be avoided,
-  uses of `__AMDGCN_WAVEFRONT_SIZE`, `__AMDGCN_WAVEFRONT_SIZE__`, or `warpSize`
-  can be replaced with a user-defined macro or `constexpr` variable with the wavefront
-  size(s) for the target hardware. For example: 
-
-```
-   #if defined(__GFX9__)
-   #define MY_MACRO_FOR_WAVEFRONT_SIZE 64
-   #else
-   #define MY_MACRO_FOR_WAVEFRONT_SIZE 32
-   #endif
-```
-
-### HIPCC Perl scripts deprecation
-
-The HIPCC Perl scripts (`hipcc.pl` and `hipconfig.pl`) will be removed in an upcoming release.
-
-### Changes to ROCm Object Tooling
-
-ROCm Object Tooling tools ``roc-obj-ls``, ``roc-obj-extract``, and ``roc-obj`` are
-deprecated in ROCm 6.4, and will be removed in a future release. Functionality
-has been added to the ``llvm-objdump --offloading`` tool option to extract all
-clang-offload-bundles into individual code objects found within the objects
-or executables passed as input.  The ``llvm-objdump --offloading`` tool option also
-supports the ``--arch-name`` option, and only extracts code objects found with
-the specified target architecture. See [llvm-objdump](https://llvm.org/docs/CommandGuide/llvm-objdump.html)
-for more information. 
-
-### HIP runtime API changes
- 
-There are a number of upcoming changes planned for HIP runtime API in an upcoming major release 
-that are not backward compatible with prior releases. Most of these changes increase 
-alignment between HIP and CUDA APIs or behavior. Some of the upcoming changes are to 
-clean up header files, remove namespace collision, and have a clear separation between 
-`hipRTC` and HIP runtime. For more information, see [HIP 7.0 Is Coming: What You Need to Know to Stay Ahead](https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0-blog/README.html).
--- a/default.xml
+++ b/default.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-6.4.2"
+    <default revision="refs/tags/rocm-6.4.1"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
--- a/docs/about/license.md
+++ b/docs/about/license.md
@@ -1,149 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="ROCm licensing terms">
-  <meta name="keywords" content="license, licensing terms">
-</head>
-
-# ROCm license
-
-```{include} ../../LICENSE
-```
-
-:::{note}
-The preceding license applies to the [ROCm repository](https://github.com/ROCm/ROCm), which
-primarily contains documentation. For licenses related to other ROCm components, refer to the
-following section.
-:::
-
-## ROCm component licenses
-
-ROCm is released by Advanced Micro Devices, Inc. (AMD) and is licensed per component separately.
-The following table is a list of ROCm components with links to their respective license
-terms. These components may include third party components subject to
-additional licenses. Please review individual repositories for more information.
-
-<!-- spellcheck-disable -->
-| Component | License |
-|:---------------------|:-------------------------|
-| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
-| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
-| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
-| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
-| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
-| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
-| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
-| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
-| [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
-| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
-| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
-| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
-| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
-| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
-| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
-| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
-| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
-| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
-| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
-| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
-| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
-| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html) |
-| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
-| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
-| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
-| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
-| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
-| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
-| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
-| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
-| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
-| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
-| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
-| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
-| [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
-| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
-| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
-| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
-| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
-| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
-| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
-| [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
-| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
-| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
-| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
-| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
-| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
-| [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
-| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
-| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
-| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
-| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
-| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
-| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
-| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
-| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
-| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
-| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
-| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
-| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
-| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
-| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
-| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
-| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
-
-Open sourced ROCm components are released via public GitHub
-repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
-Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com).
-Proprietary components are organized in a proprietary subdirectory in the package
-repositories to distinguish from open sourced packages.
-
-```{note}
-The following additional terms and conditions apply to your use of ROCm technical documentation.
-```
-
-©2023 - 2025 Advanced Micro Devices, Inc. All rights reserved.
-
-The information presented in this document is for informational purposes only
-and may contain technical inaccuracies, omissions, and typographical errors. The
-information contained herein is subject to change and may be rendered inaccurate
-for many reasons, including but not limited to product and roadmap changes,
-component and motherboard version changes, new model and/or product releases,
-product differences between differing manufacturers, software changes, BIOS
-flashes, firmware upgrades, or the like. Any computer system has risks of
-security vulnerabilities that cannot be completely prevented or mitigated. AMD
-assumes no obligation to update or otherwise correct or revise this information.
-However, AMD reserves the right to revise this information and to make changes
-from time to time to the content hereof without obligation of AMD to notify any
-person of such revisions or changes.
-
-THIS INFORMATION IS PROVIDED “AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES
-WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY
-INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD
-SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT,
-MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE
-LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER
-CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN,
-EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
-AMD, the AMD Arrow logo, ROCm, and combinations thereof are trademarks of
-Advanced Micro Devices, Inc. Other product names used in this publication are
-for identification purposes only and may be trademarks of their respective
-companies.
-
-### Package licensing
-
-:::{attention}
-AQL Profiler and AOCC CPU optimization are both provided in binary form, each
-subject to the license agreement enclosed in the directory for the binary available
-in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
-copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
-the terms and conditions of this license agreement. If you do not agree to the
-terms of this agreement, do not install, copy or use the AQL Profiler and/or the
-AOCC CPU Optimizations.
-:::
-
-For the rest of the ROCm packages, you can find the licensing information at the
-following location: `/opt/rocm/share/doc/<component-name>/` or in the locations
-specified in the preceding table.
-
-For example, you can fetch the licensing information of the `amd_comgr`
-component (Code Object Manager) from the `/opt/rocm/share/doc/amd_comgr/LICENSE.txt` file.
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -1,131 +0,0 @@
-ROCm Version,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
-      ,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
-      ,"SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
-      ,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
-      ,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
-      ,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
-      ,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
-      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
-      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
-      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
-      ,RDNA4,RDNA4,,,,,,,,,,,,,,,
-      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
-      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
-      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
-      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
-      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
-      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
-      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
-,,,,,,,,,,,,,,,,,
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
-      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
-      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,N/A,85f95ae,85f95ae,85f95ae,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,N/A,0.7.0,0.7.0,0.7.0,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
-,,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
-      ,,,,,,,,,,,,,,,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      Thrust,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      CUB,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-,,,,,,,,,,,,,,,,,
-      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
-      ,,,,,,,,,,,,,,,,,
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
-      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
-      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
-      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
-      ,,,,,,,,,,,,,,,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
-      :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      ,,,,,,,,,,,,,,,,,
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
-      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
-      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
-      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
-      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
-      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
-      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
-      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
-      :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
-      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
-      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
-      :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
-      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
-      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
-      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
-      ,,,,,,,,,,,,,,,,,
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
-      :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,,,
-      SUPPORT LIBS,,,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
-      ,,,,,,,,,,,,,,,,,
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
-      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
-      ,,,,,,,,,,,,,,,,,
-      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCTracer <roctracer:index>`,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
-      ,,,,,,,,,,,,,,,,,
-      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,
-      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
-      ,,,,,,,,,,,,,,,,,
-      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-,,,,,,,,,,,,,,,,,
-      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`HIP <hip:index>`,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -1,248 +0,0 @@
-.. meta::
-    :description: ROCm compatibility matrix
-    :keywords: GPU, architecture, hardware, compatibility, system, requirements, components, libraries
-
-**************************************************************************************
-Compatibility matrix
-**************************************************************************************
-
-Use this matrix to view the ROCm compatibility and system requirements across successive major and minor releases.
-
-You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
-
-Accelerators and GPUs listed in the following table support compute workloads (no display
-information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
-workloads, see the `Use ROCm on Radeon GPU documentation
-<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
-compatibility and system requirements.
-
-.. |br| raw:: html
-
-   <br/>
-
-.. container:: format-big-table
-
-  .. csv-table::
-      :header: "ROCm Version", "6.4.2", "6.4.1", "6.3.0"
-      :stub-columns: 1
-
-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
-      ,"RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10
-      ,"SLES 15 SP7, SP6",SLES 15 SP6,"SLES 15 SP6, SP5"
-      ,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
-      ,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
-      ,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
-      ,.. _architecture-support-compatibility-matrix:,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
-      ,CDNA2,CDNA2,CDNA2
-      ,CDNA,CDNA,CDNA
-      ,RDNA4,RDNA4,
-      ,RDNA3,RDNA3,RDNA3
-      ,RDNA2,RDNA2,RDNA2
-      ,.. _gpu-support-compatibility-matrix:,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
-      ,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
-      ,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_,
-      ,gfx1100,gfx1100,gfx1100
-      ,gfx1030,gfx1030,gfx1030
-      ,gfx942,gfx942,gfx942
-      ,gfx90a,gfx90a,gfx90a
-      ,gfx908,gfx908,gfx908
-      ,,,
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31  
-      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
-      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
-      ,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
-      ,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
-      Thrust,2.5.0,2.5.0,2.3.2
-      CUB,2.5.0,2.5.0,2.3.2
-      ,,,
-      KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
-      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
-      ,,,
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
-      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
-      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
-      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
-      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
-      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
-      ,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
-      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
-      :doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.0,N/A
-      ,,,
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
-      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
-      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
-      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
-      :doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.0,4.3.0
-      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
-      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
-      :doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.0,3.27.0
-      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
-      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
-      ,,,
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
-      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
-      :doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.0,3.3.0
-      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
-      ,,,
-      SUPPORT LIBS,,,
-      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.2,6.4.1,6.3.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
-      ,,,
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
-      :doc:`AMD SMI <amdsmi:index>`,25.5.1,25.4.2,24.7.1
-      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
-      ,,,
-      PERFORMANCE TOOLS,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.0,3.0.0
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.1,0.1.0
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60402,2.0.60401,2.0.60300
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`ROCTracer <roctracer:index>`,4.1.60402,4.1.60401,4.1.60300
-      ,,,
-      DEVELOPMENT TOOLS,,,
-      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
-      ,,,
-      COMPILERS,.. _compilers-support-compatibility-matrix:,,
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
-      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25184,18.0.0.24455
-      :doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25184,18.0.0.24491
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25184,18.0.0.24491
-      ,,,
-      RUNTIMES,.. _runtime-support-compatibility-matrix:,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43483,6.3.42131
-      :doc:`HIP <hip:index>`,6.4.43484,6.4.43483,6.3.42131
-      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
-
-.. rubric:: Footnotes
-
-.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
-.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
-.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
-.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
-
-
-.. _OS-kernel-versions:
-
-Operating systems, kernel and Glibc versions
-*********************************************
-
-Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
-
-.. csv-table::
-   :header: "OS", "Version", "Kernel", "Glibc"
-   :widths: 40, 20, 30, 20
-   :stub-columns: 1
-
-   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
-   ,,
-   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
-   ,,
-   `Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
-   ,9.5, 5.14+, 2.34
-   ,9.4, 5.14+, 2.34
-   ,9.3, 5.14+, 2.34
-   ,,
-   `Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
-   ,8.9, 4.18.0, 2.28
-   ,,
-   `SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
-   ,15 SP6, "6.5.0+, 6.4.0", 2.38
-   ,15 SP5, 5.14.21, 2.31
-   ,,
-   `Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
-   ,8, 5.15.0 (UEK), 2.28
-   ,,
-   `Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
-   ,,
-   `Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
-   ,,
-
-.. note::
-
-  * See `Red Hat Enterprise Linux Release Dates <https://access.redhat.com/articles/3078>`_ to learn about the specific kernel versions supported on Red Hat Enterprise Linux (RHEL).
-  * See `List of SUSE Linux Enterprise Server kernel <https://www.suse.com/support/kb/doc/?id=000019587>`_ to learn about the specific kernel version supported on SUSE Linux Enterprise Server (SLES).
-..
-   Footnotes and ref anchors in below historical tables should be appended with "-past-60", to differentiate from the
-   footnote references in the above, latest, compatibility matrix.  It also allows to easily find & replace.
-   An easy way to work is to download the historical.CSV file, and update open it in excel. Then when content is ready,
-   delete the columns you don't need, to build the current compatibility matrix to use in above table.  Find & replace all
-   instances of "-past-60" to make it ready for above table.
-
-
-.. _past-rocm-compatibility-matrix:
-
-Past versions of ROCm compatibility matrix
-***************************************************
-
-Expand for full historical view of:
-
-.. dropdown:: ROCm 6.0 - Present
-
-   You can `download the entire .csv <../downloads/compatibility-matrix-historical-6.0.csv>`_ for offline reference.
-
-   .. csv-table::
-      :file: compatibility-matrix-historical-6.0.csv
-      :header-rows: 1
-      :stub-columns: 1
-
-   .. rubric:: Footnotes
-
-   .. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
-   .. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
-   .. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
-   .. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
-   .. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
-   .. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
-   .. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
-   .. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
-   .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
-   .. [#verl_compat] verl is only supported on ROCm 6.2.0.
-   .. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
-   .. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
-   .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
-   .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
-   
--- a/docs/compatibility/ml-compatibility/dgl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/dgl-compatibility.rst
@@ -1,255 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: Deep Graph Library (DGL) compatibility
-    :keywords: GPU, DGL compatibility
-
-.. version-set:: rocm_version latest
-
-********************************************************************************
-DGL compatibility
-********************************************************************************
-
-Deep Graph Library `(DGL) <https://www.dgl.ai/>`_ is an easy-to-use, high-performance and scalable 
-Python package for deep learning on graphs. DGL is framework agnostic, meaning 
-if a deep graph model is a component in an end-to-end application, the rest of 
-the logic is implemented using PyTorch.  
-
-* ROCm support for DGL is hosted in the `https://github.com/ROCm/dgl <https://github.com/ROCm/dgl>`_ repository. 
-* Due to independent compatibility considerations, this location differs from the `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`_ upstream repository. 
-* Use the prebuilt :ref:`Docker images <dgl-docker-compat>` with DGL, PyTorch, and ROCm preinstalled.
-* See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>` 
-  to install and get started.
-
-
-Supported devices
-================================================================================
-
- **Officially Supported**: TF32 with AMD Instinct MI300X (through hipblaslt)
- **Partially Supported**: TF32 with AMD Instinct MI250X
-
-
-.. _dgl-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-DGL can be used for Graph Learning, and building popular graph models like  
-GAT, GCN and GraphSage. Using these we can support a variety of use-cases such as:
-
- Recommender systems
- Network Optimization and Analysis
- 1D (Temporal) and 2D (Image) Classification
- Drug Discovery
-
-Multiple use cases of DGL have been tested and verified.
-However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
-Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_, 
-where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs. 
-
-Coverage includes:
-
- Single-GPU training/inference
- Multi-GPU training
-
-
-.. _dgl-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl>`_
-with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
-Click the |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: DGL Docker image components
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker
-      - DGL
-      - PyTorch
-      - Ubuntu
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
-      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
-      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      
-
-Key ROCm libraries for DGL
-================================================================================
-
-DGL on ROCm depends on specific libraries that affect its features and performance.
-Using the DGL Docker container or building it with the provided docker file or a ROCm base image is recommended.
-If you prefer to build it yourself, ensure the following dependencies are installed:
-
-.. list-table:: 
-    :header-rows: 1
-
-    * - ROCm library
-      - Version
-      - Purpose
-    * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
-      - :version-ref:`"Composable Kernel" rocm_version`
-      - Enables faster execution of core operations like matrix multiplication
-        (GEMM), convolutions and transformations.
-    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
-      - :version-ref:`hipBLAS rocm_version`
-      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
-        matrix and vector operations.
-    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
-      - :version-ref:`hipBLASLt rocm_version`
-      - hipBLASLt is an extension of the hipBLAS library, providing additional
-        features like epilogues fused into the matrix multiplication kernel or
-        use of integer tensor cores.
-    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
-      - :version-ref:`hipCUB rocm_version`
-      - Provides a C++ template library for parallel algorithms for reduction,
-        scan, sort and select.
-    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
-      - :version-ref:`hipFFT rocm_version`
-      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
-    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
-      - :version-ref:`hipRAND rocm_version`
-      - Provides fast random number generation for GPUs.
-    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
-      - :version-ref:`hipSOLVER rocm_version`
-      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
-        singular value decompositions (SVD).
-    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
-      - :version-ref:`hipSPARSE rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
-      - :version-ref:`hipSPARSELt rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-    * - `hipTensor <https://github.com/ROCm/hipTensor>`_
-      - :version-ref:`hipTensor rocm_version`
-      - Optimizes for high-performance tensor operations, such as contractions.
-    * - `MIOpen <https://github.com/ROCm/MIOpen>`_
-      - :version-ref:`MIOpen rocm_version`
-      - Optimizes deep learning primitives such as convolutions, pooling,
-        normalization, and activation functions.
-    * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
-      - :version-ref:`MIGraphX rocm_version`
-      - Adds graph-level optimizations, ONNX models and mixed precision support
-        and enable Ahead-of-Time (AOT) Compilation.
-    * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
-      - :version-ref:`MIVisionX rocm_version`
-      - Optimizes acceleration for computer vision and AI workloads like
-        preprocessing, augmentation, and inferencing.
-    * - `rocAL <https://github.com/ROCm/rocAL>`_
-      - :version-ref:`rocAL rocm_version`
-      - Accelerates the data pipeline by offloading intensive preprocessing and
-        augmentation tasks. rocAL is part of MIVisionX.
-    * - `RCCL <https://github.com/ROCm/rccl>`_
-      - :version-ref:`RCCL rocm_version`
-      - Optimizes for multi-GPU communication for operations like AllReduce and
-        Broadcast.
-    * - `rocDecode <https://github.com/ROCm/rocDecode>`_
-      - :version-ref:`rocDecode rocm_version`
-      - Provides hardware-accelerated data decoding capabilities, particularly
-        for image, video, and other dataset formats.
-    * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
-      - :version-ref:`rocJPEG rocm_version`
-      - Provides hardware-accelerated JPEG image decoding and encoding.
-    * - `RPP <https://github.com/ROCm/RPP>`_
-      - :version-ref:`RPP rocm_version`
-      - Speeds up data augmentation, transformation, and other preprocessing steps.
-    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
-      - :version-ref:`rocThrust rocm_version`
-      - Provides a C++ template library for parallel algorithms like sorting,
-        reduction, and scanning.
-    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
-      - :version-ref:`rocWMMA rocm_version`
-      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
-        multiplication (GEMM) and accumulation operations with mixed precision
-        support.
-
-
-Supported features
-================================================================================
-
-Many functions and methods available in DGL Upstream are also supported in DGL ROCm.
-Instead of listing them all, support is grouped into the following categories to provide a general overview. 
-
-* DGL Base
-* DGL Backend 
-* DGL Data
-* DGL Dataloading
-* DGL DGLGraph
-* DGL Function
-* DGL Ops
-* DGL Sampling
-* DGL Transforms
-* DGL Utils
-* DGL Distributed
-* DGL Geometry
-* DGL Mpops
-* DGL NN
-* DGL Optim
-* DGL Sparse
-
-
-Unsupported features
-================================================================================
-
-* Graphbolt
-* Partial TF32 Support (MI250x only)
-* Kineto/ ROCTracer integration
-
-
-Unsupported functions
-================================================================================
-
-* ``more_nnz``
-* ``format``
-* ``multiprocess_sparse_adam_state_dict``
-* ``record_stream_ndarray``
-* ``half_spmm``
-* ``segment_mm`` 
-* ``gather_mm_idx_b``
-* ``pgexplainer``
-* ``sample_labors_prob``
-* ``sample_labors_noprob``
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -1,314 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: JAX compatibility
-   :keywords: GPU, JAX compatibility
-
-.. version-set:: rocm_version latest
-
-*******************************************************************************
-JAX compatibility
-*******************************************************************************
-
-JAX provides a NumPy-like API, which combines automatic differentiation and the
-Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
-learning at scale.
-
-JAX uses composable transformations of Python and NumPy through just-in-time
-(JIT) compilation, automatic vectorization, and parallelization. To learn about
-JAX, including profiling and optimizations, see the official `JAX documentation
-<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
-
-ROCm support for JAX is upstreamed, and users can build the official source code
-with ROCm support:
-
- ROCm JAX release:
-
-  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
-    with ROCm and JAX preinstalled.
-
-  - ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
-
-  - See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
-    to get started.
-
- Official JAX release:
-
-  - Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
-
-  - See the `AMD GPU (Linux) installation section
-    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
-    the JAX documentation.
-
-.. note::
-
-   AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
-   quarterly alongside new ROCm releases. These images undergo full AMD testing.
-   `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
-   follow upstream JAX releases and use the latest available ROCm version.
-
-Use cases and recommendations
-================================================================================
-
-* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
-  blog explores the implementation and training of a Generative Pre-trained
-  Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s JAX-based
-  nanoGPT. Comparing how essential GPT components—such as self-attention
-  mechanisms and optimizers—are realized in JAX and JAX, also highlights
-  JAX’s unique features.
-
-* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
-  ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
-  blog post provides a comprehensive guide on enhancing the training efficiency
-  of GPT models by implementing mixed precision techniques in JAX, specifically
-  tailored for AMD GPUs utilizing the ROCm platform.
-
-* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
-  blog demonstrates how to develop a custom fused dropout-activation kernel for
-  matrices using Triton, integrate it with JAX, and benchmark its performance
-  using ROCm.
-
-* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
-  outlines the process of fine-tuning a Bidirectional Encoder Representations
-  from Transformers (BERT)-based large language model (LLM) using JAX for a text
-  classification task. The blog post discuss techniques for parallelizing the
-  fine-tuning across multiple AMD GPUs and assess the model's performance on a
-  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
-  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
-  used on a multi-GPU setup.
-
-* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
-  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  accelerator using ROCm. The page is aimed at helping users achieve optimal
-  performance for deep learning and other high-performance computing tasks on
-  the MI300X GPU.
-
-For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
-
-.. _jax-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
-with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories represent the latest JAX version from the official Docker Hub and are validated for
-`ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
-icon to view the image on Docker Hub.
-
-.. list-table:: JAX Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - JAX
-      - Linux
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
-
-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
-      - Ubuntu 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
-
-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
-      - Ubuntu 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
-
-AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
-with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
-
-.. list-table:: JAX community Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - JAX
-      - Linux
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
-
-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
-      - Ubuntu 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-
-.. _key_rocm_libraries:
-
-Key ROCm libraries for JAX
-================================================================================
-
-The following ROCm libraries represent potential targets that could be utilized
-by JAX on ROCm for various computational tasks. The actual libraries used will
-depend on the specific implementation and operations performed.
-
-.. list-table::
-    :header-rows: 1
-
-    * - ROCm library
-      - Version
-      - Purpose
-    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
-      - :version-ref:`hipBLAS rocm_version`
-      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
-        matrix and vector operations.
-    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
-      - :version-ref:`hipBLASLt rocm_version`
-      - hipBLASLt is an extension of hipBLAS, providing additional
-        features like epilogues fused into the matrix multiplication kernel or
-        use of integer tensor cores.
-    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
-      - :version-ref:`hipCUB rocm_version`
-      - Provides a C++ template library for parallel algorithms for reduction,
-        scan, sort and select.
-    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
-      - :version-ref:`hipFFT rocm_version`
-      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
-    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
-      - :version-ref:`hipRAND rocm_version`
-      - Provides fast random number generation for GPUs.
-    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
-      - :version-ref:`hipSOLVER rocm_version`
-      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
-        singular value decompositions (SVD).
-    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
-      - :version-ref:`hipSPARSE rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
-      - :version-ref:`hipSPARSELt rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-    * - `MIOpen <https://github.com/ROCm/MIOpen>`_
-      - :version-ref:`MIOpen rocm_version`
-      - Optimized for deep learning primitives such as convolutions, pooling,
-        normalization, and activation functions.
-    * - `RCCL <https://github.com/ROCm/rccl>`_
-      - :version-ref:`RCCL rocm_version`
-      - Optimized for multi-GPU communication for operations like  all-reduce,
-        broadcast, and scatter.
-    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
-      - :version-ref:`rocThrust rocm_version`
-      - Provides a C++ template library for parallel algorithms like sorting,
-        reduction, and scanning.
-
-.. note::
-
-    This table shows ROCm libraries that could potentially be utilized by JAX. Not
-    all libraries may be used in every configuration, and the actual library usage
-    will depend on the specific operations and implementation details.
-
-Supported data types and modules
-===============================================================================
-
-The following tables lists the supported public JAX API data types and modules.
-
-Supported data types
--------------------------------------------------------------------------------
-
-ROCm supports all the JAX data types of `jax.dtypes <https://docs.jax.dev/en/latest/jax.dtypes.html>`_
-module, `jax.numpy.dtype <https://docs.jax.dev/en/latest/_autosummary/jax.numpy.dtype.html>`_
-and `default_dtype <https://docs.jax.dev/en/latest/default_dtypes.html>`_ .
-The ROCm supported data types in JAX are collected in the following table.
-
-.. list-table::
-    :header-rows: 1
-
-    * - Data type
-      - Description
-
-    * - ``bfloat16``
-      - 16-bit bfloat (brain floating point).
-
-    * - ``bool``
-      - Boolean.
-
-    * - ``complex128``
-      - 128-bit complex.
-
-    * - ``complex64``
-      - 64-bit complex.
-
-    * - ``float16``
-      - 16-bit (half precision) floating-point.
-
-    * - ``float32``
-      - 32-bit (single precision) floating-point.
-
-    * - ``float64``
-      - 64-bit (double precision) floating-point.
-
-    * - ``half``
-      - 16-bit (half precision) floating-point.
-
-    * - ``int16``
-      - Signed 16-bit integer.
-
-    * - ``int32``
-      - Signed 32-bit integer.
-
-    * - ``int64``
-      - Signed 64-bit integer.
-
-    * - ``int8``
-      - Signed 8-bit integer.
-
-    * - ``uint16``
-      - Unsigned 16-bit (word) integer.
-
-    * - ``uint32``
-      - Unsigned 32-bit (dword) integer.
-
-    * - ``uint64``
-      - Unsigned 64-bit (qword) integer.
-
-    * - ``uint8``
-      - Unsigned 8-bit (byte) integer.
-
-.. note::
-
-  JAX data type support is effected by the :ref:`key_rocm_libraries` and it's
-  collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>`
-  page.
-
-Supported modules
--------------------------------------------------------------------------------
-
-For a complete and up-to-date list of JAX public modules (for example, ``jax.numpy``,
-``jax.scipy``, ``jax.lax``), their descriptions, and usage, please refer directly to the
-`official JAX API documentation <https://jax.readthedocs.io/en/latest/jax.html>`_.
-
-.. note::
-
-  Since version 0.1.56, JAX has full support for ROCm, and the
-  :ref:`Known issues and important notes <jax_comp_known_issues>` section
-  contains details about limitations specific to the ROCm backend. The list of
-  JAX API modules is maintained by the JAX project and is subject to change. 
-  Refer to the official Jax documentation for the most up-to-date information.
--- a/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/megablocks-compatibility.rst
@@ -1,93 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: Megablocks compatibility
-    :keywords: GPU, megablocks, compatibility
-
-.. version-set:: rocm_version latest
-
-********************************************************************************
-Megablocks compatibility
-********************************************************************************
-
-Megablocks is a light-weight library for mixture-of-experts (MoE) training. 
-The core of the system is efficient "dropless-MoE" and standard MoE layers. 
-Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_, 
-where data and pipeline parallel training of MoEs is supported.
-
-* ROCm support for Megablocks is hosted in the official `https://github.com/ROCm/megablocks <https://github.com/ROCm/megablocks>`_ repository. 
-* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository. 
-* Use the prebuilt :ref:`Docker image <megablocks-docker-compat>` with ROCm, PyTorch, and Megablocks preinstalled. 
-* See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` to install and get started.
-
-.. note::
-
-  Megablocks is supported on ROCm 6.3.0.
-
-Supported devices
-================================================================================
-
- **Officially Supported**: AMD Instinct MI300X
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
-
-Supported models and features
-================================================================================
-
-This section summarizes the Megablocks features supported by ROCm.
-
-* Distributed Pre-training
-* Activation Checkpointing and Recomputation
-* Distributed Optimizer
-* Mixture-of-Experts
-* dropless-Mixture-of-Experts
-
-
-.. _megablocks-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-The `ROCm Megablocks blog posts <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ 
-guide how to leverage the ROCm platform for pre-training using the Megablocks framework. 
-It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
-
-* Single-GPU pre-training
-* Multi-GPU pre-training
-
-
-.. _megablocks-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes `ROCm Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`_
-with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
-inventories represent the latest Megatron-LM version from the official Docker Hub.
-The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
-Click |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: 
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker image
-      - ROCm
-      - Megablocks
-      - PyTorch
-      - Ubuntu
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/megablocks/megablocks-0.7.0_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-372ff89b96599019b8f5f9db469c84add2529b713456781fa62eb9a148659ab4"><i class="fab fa-docker fa-lg"></i> rocm/megablocks</a>
-      - `6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_
-      - `0.7.0 <https://github.com/databricks/megablocks/releases/tag/v0.7.0>`_
-      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-
-
--- a/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
@@ -1,535 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: PyTorch compatibility
-    :keywords: GPU, PyTorch compatibility
-
-.. version-set:: rocm_version latest
-
-********************************************************************************
-PyTorch compatibility
-********************************************************************************
-
-`PyTorch <https://pytorch.org/>`__ is an open-source tensor library designed for
-deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
-using `MIOpen <https://github.com/ROCm/MIOpen>`__ and
-`RCCL <https://github.com/ROCm/rccl>`__ libraries.
-
-ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
-to independent compatibility considerations, this results in two distinct
-release cycles for PyTorch on ROCm:
-
- ROCm PyTorch release:
-
-  - Provides the latest version of ROCm but might not necessarily support the
-    latest stable PyTorch version.
-
-  - Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
-    preinstalled.
-
-  - ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`__
-
-  - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
-    to get started.
-
- Official PyTorch release:
-
-  - Provides the latest stable version of PyTorch  but might not necessarily
-    support the latest ROCm version.
-
-  - Official PyTorch repository: `<https://github.com/pytorch/pytorch>`__
-
-  - See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`__
-    or `Previous versions <https://pytorch.org/get-started/previous-versions/>`__
-    to get started.
-
-PyTorch includes tooling that generates HIP source code from the CUDA backend.
-This approach allows PyTorch to support ROCm without requiring manual code
-modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
-
-ROCm development is aligned with the stable release of PyTorch, while upstream
-PyTorch testing uses the stable release of ROCm to maintain consistency.
-
-.. _pytorch-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
-  guides how to leverage the ROCm platform for training AI models. It covers the
-  steps, tools, and best practices for optimizing training workflows on AMD GPUs
-  using PyTorch features.
-
-* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
-  describes and demonstrates how to use the ROCm platform for the fine-tuning
-  and inference of machine learning models, particularly large language models
-  (LLMs), on systems with a single GPU. This topic provides a detailed guide for
-  setting up, optimizing, and executing fine-tuning and inference workflows in
-  such environments.
-
-* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
-  describes and demonstrates the fine-tuning and inference of machine learning
-  models on systems with multiple GPUs.
-
-* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
-  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  accelerator using ROCm. This guide helps users achieve optimal performance for
-  deep learning and other high-performance computing tasks on the MI300X
-  accelerator.
-
-* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
-  describes how PyTorch integrates with ROCm for AI workloads It outlines the
-  use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
-  GPU hardware for training and inference tasks in AI applications.
-
-For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`__.
-
-.. _pytorch-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
-with ROCm backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
-Click |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: PyTorch Docker image components
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker
-      - PyTorch
-      - Ubuntu
-      - Python
-      - Apex
-      - torchvision
-      - TensorBoard
-      - MAGMA
-      - UCX
-      - OMPI
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 22.04
-      - `3.11 <https://www.python.org/downloads/release/python-31113/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
-      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
-      - 22.04
-      - `3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
-      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
-      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
-      - 24.04
-      - `3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
-      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
-      - `master <https://bitbucket.org/icl/magma/src/master/>`__
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
-
-Key ROCm libraries for PyTorch
-================================================================================
-
-PyTorch functionality on ROCm is determined by its underlying library
-dependencies. These ROCm components affect the capabilities, performance, and
-feature set available to developers.
-
-.. list-table::
-    :header-rows: 1
-
-    * - ROCm library
-      - Version
-      - Purpose
-      - Used in
-    * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`__
-      - :version-ref:`"Composable Kernel" rocm_version`
-      - Enables faster execution of core operations like matrix multiplication
-        (GEMM), convolutions and transformations.
-      - Speeds up ``torch.permute``, ``torch.view``, ``torch.matmul``,
-        ``torch.mm``, ``torch.bmm``, ``torch.nn.Conv2d``, ``torch.nn.Conv3d``
-        and ``torch.nn.MultiheadAttention``.
-    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
-      - :version-ref:`hipBLAS rocm_version`
-      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
-        matrix and vector operations.
-      - Supports operations such as matrix multiplication, matrix-vector
-        products, and tensor contractions. Utilized in both dense and batched
-        linear algebra operations.
-    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
-      - :version-ref:`hipBLASLt rocm_version`
-      - hipBLASLt is an extension of the hipBLAS library, providing additional
-        features like epilogues fused into the matrix multiplication kernel or
-        use of integer tensor cores.
-      - Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
-        matrix multiplications used in convolutional and linear layers.
-    * - `hipCUB <https://github.com/ROCm/hipCUB>`__
-      - :version-ref:`hipCUB rocm_version`
-      - Provides a C++ template library for parallel algorithms for reduction,
-        scan, sort and select.
-      - Supports operations such as ``torch.sum``, ``torch.cumsum``,
-        ``torch.sort`` irregular shapes often involve scanning, sorting, and
-        filtering, which hipCUB handles efficiently.
-    * - `hipFFT <https://github.com/ROCm/hipFFT>`__
-      - :version-ref:`hipFFT rocm_version`
-      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
-      - Used in functions like the ``torch.fft`` module.
-    * - `hipRAND <https://github.com/ROCm/hipRAND>`__
-      - :version-ref:`hipRAND rocm_version`
-      - Provides fast random number generation for GPUs.
-      - The ``torch.rand``, ``torch.randn``, and stochastic layers like
-        ``torch.nn.Dropout`` rely on hipRAND.
-    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`__
-      - :version-ref:`hipSOLVER rocm_version`
-      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
-        singular value decompositions (SVD).
-      - Supports functions like ``torch.linalg.solve``,
-        ``torch.linalg.eig``, and ``torch.linalg.svd``.
-    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`__
-      - :version-ref:`hipSPARSE rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-      - Sparse tensor operations ``torch.sparse``.
-    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`__
-      - :version-ref:`hipSPARSELt rocm_version`
-      - Accelerates operations on sparse matrices, such as sparse matrix-vector
-        or matrix-matrix products.
-      - Sparse tensor operations ``torch.sparse``.
-    * - `hipTensor <https://github.com/ROCm/hipTensor>`__
-      - :version-ref:`hipTensor rocm_version`
-      - Optimizes for high-performance tensor operations, such as contractions.
-      - Accelerates tensor algebra, especially in deep learning and scientific
-        computing.
-    * - `MIOpen <https://github.com/ROCm/MIOpen>`__
-      - :version-ref:`MIOpen rocm_version`
-      - Optimizes deep learning primitives such as convolutions, pooling,
-        normalization, and activation functions.
-      - Speeds up convolutional neural networks (CNNs), recurrent neural
-        networks (RNNs), and other layers. Used in operations like
-        ``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
-    * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`__
-      - :version-ref:`MIGraphX rocm_version`
-      - Adds graph-level optimizations, ONNX models and mixed precision support
-        and enable Ahead-of-Time (AOT) Compilation.
-      - Speeds up inference models and executes ONNX models for
-        compatibility with other frameworks.
-        ``torch.nn.Conv2d``, ``torch.nn.ReLU``, and ``torch.nn.LSTM``.
-    * - `MIVisionX <https://github.com/ROCm/MIVisionX>`__
-      - :version-ref:`MIVisionX rocm_version`
-      - Optimizes acceleration for computer vision and AI workloads like
-        preprocessing, augmentation, and inferencing.
-      - Faster data preprocessing and augmentation pipelines for datasets like
-        ImageNet or COCO and easy to integrate into PyTorch's ``torch.utils.data``
-        and ``torchvision`` workflows.
-    * - `rocAL <https://github.com/ROCm/rocAL>`__
-      - :version-ref:`rocAL rocm_version`
-      - Accelerates the data pipeline by offloading intensive preprocessing and
-        augmentation tasks. rocAL is part of MIVisionX.
-      - Easy to integrate into PyTorch's ``torch.utils.data`` and
-        ``torchvision`` data load workloads.
-    * - `RCCL <https://github.com/ROCm/rccl>`__
-      - :version-ref:`RCCL rocm_version`
-      - Optimizes for multi-GPU communication for operations like AllReduce and
-        Broadcast.
-      - Distributed data parallel training (``torch.nn.parallel.DistributedDataParallel``).
-        Handles communication in multi-GPU setups.
-    * - `rocDecode <https://github.com/ROCm/rocDecode>`__
-      - :version-ref:`rocDecode rocm_version`
-      - Provides hardware-accelerated data decoding capabilities, particularly
-        for image, video, and other dataset formats.
-      - Can be integrated in ``torch.utils.data``, ``torchvision.transforms``
-        and ``torch.distributed``.
-    * - `rocJPEG <https://github.com/ROCm/rocJPEG>`__
-      - :version-ref:`rocJPEG rocm_version`
-      - Provides hardware-accelerated JPEG image decoding and encoding.
-      - GPU accelerated ``torchvision.io.decode_jpeg`` and
-        ``torchvision.io.encode_jpeg`` and can be integrated in
-        ``torch.utils.data`` and ``torchvision``.
-    * - `RPP <https://github.com/ROCm/RPP>`__
-      - :version-ref:`RPP rocm_version`
-      - Speeds up data augmentation, transformation, and other preprocessing steps.
-      - Easy to integrate into PyTorch's ``torch.utils.data`` and
-        ``torchvision`` data load workloads to speed up data processing.
-    * - `rocThrust <https://github.com/ROCm/rocThrust>`__
-      - :version-ref:`rocThrust rocm_version`
-      - Provides a C++ template library for parallel algorithms like sorting,
-        reduction, and scanning.
-      - Utilized in backend operations for tensor computations requiring
-        parallel processing.
-    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
-      - :version-ref:`rocWMMA rocm_version`
-      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
-        multiplication (GEMM) and accumulation operations with mixed precision
-        support.
-      - Linear layers (``torch.nn.Linear``), convolutional layers
-        (``torch.nn.Conv2d``), attention layers, general tensor operations that
-        involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
-        more.
-
-Supported modules and data types
-================================================================================
-
-The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
-
-Supported data types
--------------------------------------------------------------------------------
-
-The tensor data type is specified using the ``dtype`` attribute or argument.
-PyTorch supports many data types for different use cases.
-
-The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`__
-single data types:
-
-.. list-table::
-    :header-rows: 1
-
-    * - Data type
-      - Description
-    * - ``torch.float8_e4m3fn``
-      - 8-bit floating point, e4m3
-    * - ``torch.float8_e5m2``
-      - 8-bit floating point, e5m2
-    * - ``torch.float16`` or ``torch.half``
-      - 16-bit floating point
-    * - ``torch.bfloat16``
-      - 16-bit floating point
-    * - ``torch.float32`` or ``torch.float``
-      - 32-bit floating point
-    * - ``torch.float64`` or ``torch.double``
-      - 64-bit floating point
-    * - ``torch.complex32`` or ``torch.chalf``
-      - 32-bit complex numbers
-    * - ``torch.complex64`` or ``torch.cfloat``
-      - 64-bit complex numbers
-    * - ``torch.complex128`` or ``torch.cdouble``
-      - 128-bit complex numbers
-    * - ``torch.uint8``
-      - 8-bit integer (unsigned)
-    * - ``torch.uint16``
-      - 16-bit integer (unsigned);
-        Not natively supported in ROCm
-    * - ``torch.uint32``
-      - 32-bit integer (unsigned);
-        Not natively supported in ROCm
-    * - ``torch.uint64``
-      - 64-bit integer (unsigned);
-        Not natively supported in ROCm
-    * - ``torch.int8``
-      - 8-bit integer (signed)
-    * - ``torch.int16`` or ``torch.short``
-      - 16-bit integer (signed)
-    * - ``torch.int32`` or ``torch.int``
-      - 32-bit integer (signed)
-    * - ``torch.int64`` or ``torch.long``
-      - 64-bit integer (signed)
-    * - ``torch.bool``
-      - Boolean
-    * - ``torch.quint8``
-      - Quantized 8-bit integer (unsigned)
-    * - ``torch.qint8``
-      - Quantized 8-bit integer (signed)
-    * - ``torch.qint32``
-      - Quantized 32-bit integer (signed)
-    * - ``torch.quint4x2``
-      - Quantized 4-bit integer (unsigned)
-
-.. note::
-
-  Unsigned types, except ``uint8``, have limited support in eager mode. They
-  primarily exist to assist usage with ``torch.compile``.
-
-  See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
-  native hardware support of data types.
-
-Supported modules
--------------------------------------------------------------------------------
-
-For a complete and up-to-date list of PyTorch core modules (for example., ``torch``,
-``torch.nn``, ``torch.cuda``, ``torch.backends.cuda`` and
-``torch.backends.cudnn``), their descriptions, and usage, please refer directly
-to the `official PyTorch documentation <https://pytorch.org/docs/stable/index.html>`_.
-
-Core PyTorch functionality on ROCm includes tensor operations, neural network
-layers, automatic differentiation, distributed training, mixed-precision
-training, compilation features, and domain-specific libraries for audio, vision,
-text processing, and more.
-
-Supported domain libraries
--------------------------------------------------------------------------------
-
-PyTorch offers specialized `domain libraries <https://pytorch.org/domains/>`_ with
-GPU acceleration that build on its core features to support specific application
-areas. The table below lists the PyTorch domain libraries that are compatible
-with ROCm.
-
-.. list-table::
-    :header-rows: 1
-
-    * - Library
-      - Description
-
-    * - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_ 
-      - Audio and signal processing library for PyTorch. Provides utilities for
-        audio I/O, signal and data processing functions, datasets, model
-        implementations, and application components for audio and speech
-        processing tasks.
-
-        **Note:** To ensure GPU-acceleration with ``torchaudio.transforms``,
-        you need to explicitly move audio data (waveform tensor) to GPU using
-        ``.to('cuda')``.
-
-    * - `torchtune <https://docs.pytorch.org/torchtune/stable/index.html>`_
-      - PyTorch-native library designed for fine-tuning large language models
-        (LLMs). Provides supports the full fine-tuning workflow and offers
-        compatibility with popular production inference systems.
-
-        **Note:** Only official release exists.
-
-    * - `torchvision <https://docs.pytorch.org/vision/stable/index.html>`_
-      - Computer vision library that is part of the PyTorch project. Provides
-        popular datasets, model architectures, and common image transformations
-        for computer vision applications.
-
-    * - `torchtext <https://docs.pytorch.org/text/stable/index.html>`_
-      - Text processing library for PyTorch. Provides data processing utilities
-        and popular datasets for natural language processing, including
-        tokenization, vocabulary management, and text embeddings.
-
-        **Note:** ``torchtext`` does not implement ROCm-specific kernels. 
-        ROCm acceleration is provided through the underlying PyTorch framework
-        and ROCm library integration. Only official release exists.
-
-    * - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
-      - Beta library of common modular data loading primitives for easily
-        constructing flexible and performant data pipelines, with features still
-        in prototype stage.
-
-    * - `torchrec <https://docs.pytorch.org/torchrec/>`_
-      - PyTorch domain library for common sparsity and parallelism primitives
-        needed for large-scale recommender systems, enabling authors to train
-        models with large embedding tables shared across many GPUs.
-
-        **Note:** ``torchrec`` does not implement ROCm-specific kernels. ROCm
-        acceleration is provided through the underlying PyTorch framework and
-        ROCm library integration.
-
-    * - `torchserve <https://docs.pytorch.org/serve/>`_
-      - Performant, flexible and easy-to-use tool for serving PyTorch models in
-        production, providing features for model management, batch processing,
-        and scalable deployment.
-
-        **Note:** `torchserve <https://docs.pytorch.org/serve/>`_ is no longer
-        actively maintained. Last official release is sent out with PyTorch 2.4.
-
-    * - `torchrl <https://docs.pytorch.org/rl/stable/index.html>`_
-      - Open-source, Python-first Reinforcement Learning library for PyTorch
-        with a focus on high modularity and good runtime performance, providing
-        low and high-level RL abstractions and reusable functionals for cost
-        functions, returns, and data processing.
-
-        **Note:** Only official release exists.
-
-    * - `tensordict <https://docs.pytorch.org/tensordict/stable/index.html>`_
-      - Dictionary-like class that simplifies operations on batches of tensors,
-        enhancing code readability, compactness, and modularity by abstracting
-        tailored operations and reducing errors through automatic operation
-        dispatching.
-
-        **Note:** Only official release exists.
--- a/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
@@ -1,100 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: Stanford Megatron-LM compatibility
-    :keywords: Stanford, Megatron-LM, compatibility
-
-.. version-set:: rocm_version latest
-
-********************************************************************************
-Stanford Megatron-LM compatibility
-********************************************************************************
-
-Stanford Megatron-LM is a large-scale language model training framework developed by NVIDIA `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. It is
-designed to train massive transformer-based language models efficiently by model and data parallelism. 
-
-* ROCm support for Stanford Megatron-LM is hosted in the official `https://github.com/ROCm/Stanford-Megatron-LM <https://github.com/ROCm/Stanford-Megatron-LM>`_ repository. 
-* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository. 
-* Use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>` with ROCm, PyTorch, and Megatron-LM preinstalled. 
-* See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` to install and get started.
-
-.. note::
-
-	Stanford Megatron-LM is supported on ROCm 6.3.0.
-
-
-Supported Devices
-================================================================================
-
- **Officially Supported**: AMD Instinct MI300X
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
-
-
-Supported models and features
-================================================================================
-
-This section details models & features that are supported by the ROCm version on Stanford Megatron-LM.
-
-Models:
-
-* Bert
-* GPT
-* T5
-* ICT
-
-Features:
-
-* Distributed Pre-training
-* Activation Checkpointing and Recomputation
-* Distributed Optimizer
-* Mixture-of-Experts
-
-.. _megatron-lm-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-See the `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs blog <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ post  
-to leverage the ROCm platform for pre-training by using the Stanford Megatron-LM framework of pre-processing datasets on AMD GPUs. 
-Coverage includes:
-
-  * Single-GPU pre-training
-  * Multi-GPU pre-training
-
-
-.. _megatron-lm-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/megatron-lm>`_
-with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
-inventories represent the latest Megatron-LM version from the official Docker Hub.
-The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
-Click |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: 
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker image
-      - Stanford Megatron-LM
-      - PyTorch
-      - Ubuntu
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
-      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-
-      
-
--- a/docs/compatibility/ml-compatibility/taichi-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/taichi-compatibility.rst
@@ -1,76 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: Taichi compatibility
-    :keywords: GPU, Taichi compatibility
-
-.. version-set:: rocm_version latest
-
-*******************************************************************************
-Taichi compatibility
-*******************************************************************************
-
-`Taichi <https://www.taichi-lang.org/>`_ is an open-source, imperative, and parallel 
-programming language designed for high-performance numerical computation. 
-Embedded in Python, it leverages just-in-time (JIT) compilation frameworks such as LLVM to accelerate 
-compute-intensive Python code by compiling it to native GPU or CPU instructions.
-
-Taichi is widely used across various domains, including real-time physical simulation, 
-numerical computing, augmented reality, artificial intelligence, computer vision, robotics, 
-visual effects in film and gaming, and general-purpose computing.
-
-* ROCm support for Taichi is hosted in the official `https://github.com/ROCm/taichi <https://github.com/ROCm/taichi>`_ repository.
-* Due to independent compatibility considerations, this location differs from the `https://github.com/taichi-dev <https://github.com/taichi-dev>`_ upstream repository.
-* Use the prebuilt :ref:`Docker image <taichi-docker-compat>` with ROCm, PyTorch, and Taichi preinstalled.
-* See the :doc:`ROCm Taichi installation guide <rocm-install-on-linux:install/3rd-party/taichi-install>` to install and get started.
-
-.. note::
-
-	Taichi is supported on ROCm 6.3.2.
-
-Supported devices and features
-===============================================================================
-There is support through the ROCm software stack for all Taichi GPU features on AMD Instinct MI250X and MI210X series GPUs with the exception of Taichi’s GPU rendering system, CGUI.
-AMD Instinct MI300X series GPUs will be supported by November.
-
-.. _taichi-recommendations:
-
-Use cases and recommendations
-================================================================================
-To fully leverage Taichi's performance capabilities in compute-intensive tasks, it is best to adhere to specific coding patterns and utilize Taichi decorators. 
-A collection of example use cases is available in the `https://github.com/ROCm/taichi_examples <https://github.com/ROCm/taichi_examples>`_ repository, 
-providing practical insights and foundational knowledge for working with the Taichi programming language. 
-You can also refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_ to search for Taichi examples and best practices to optimize your workflows on AMD GPUs.
-
-.. _taichi-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `ROCm Taichi Docker images <https://hub.docker.com/r/rocm/taichi/tags>`_
-with ROCm backends on Docker Hub. The following Docker image tags and associated inventories 
-represent the latest Taichi version from the official Docker Hub.
-The Docker images have been validated for `ROCm 6.3.2 <https://rocm.docs.amd.com/en/docs-6.3.2/about/release-notes.html>`_. 
-Click |docker-icon| to view the image on Docker Hub.
-
-.. list-table:: 
-    :header-rows: 1
-    :class: docker-image-compatibility
-
-    * - Docker image
-      - ROCm
-      - Taichi
-      - Ubuntu
-      - Python
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/taichi/taichi-1.8.0b1_rocm6.3.2_ubuntu22.04_py3.10.12/images/sha256-e016964a751e6a92199032d23e70fa3a564fff8555afe85cd718f8aa63f11fc6"><i class="fab fa-docker fa-lg"></i> rocm/taichi</a>
-      - `6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_
-      - `1.8.0b1 <https://github.com/taichi-dev/taichi>`_
-      - 22.04
-      - `3.10.12 <https://www.python.org/downloads/release/python-31012/>`_
--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -1,504 +0,0 @@
-:orphan:
-
-.. meta::
-    :description: TensorFlow compatibility
-    :keywords: GPU, TensorFlow compatibility
-
-.. version-set:: rocm_version latest
-
-*******************************************************************************
-TensorFlow compatibility
-*******************************************************************************
-
-`TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for
-solving machine learning, deep learning, and AI problems. It can solve many
-problems across different sectors and industries but primarily focuses on
-neural network training and inference. It is one of the most popular and
-in-demand frameworks and is very active in open-source contribution and
-development.
-
-The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__
-includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
-<http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug
-fixes, updates, and support for the latest ROCM versions.
-
- ROCm TensorFlow release:
-
-  - Offers :ref:`Docker images <tensorflow-docker-compat>` with
-    ROCm and TensorFlow pre-installed.
-
-  - ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`__
-
-  - See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
-    to get started.
-
- Official TensorFlow release:
-
-  - Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`__
-
-  - See the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list.
-
-  .. note::
-
-     The official TensorFlow documentation does not cover ROCm support. Use the
-     ROCm documentation for installation instructions for Tensorflow on ROCm.
-     See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
-
-.. _tensorflow-docker-compat:
-
-Docker image compatibility
-===============================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `TensorFlow images
-<https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
-Docker Hub. The following Docker image tags and associated inventories are
-validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
-the |docker-icon| icon to view the image on Docker Hub.
-
-.. list-table:: TensorFlow Docker image components
-    :header-rows: 1
-
-    * - Docker image
-      - TensorFlow
-      - Ubuntu
-      - Python
-      - TensorBoard
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - 24.04
-      - `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
-      - 22.04
-      - `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
-
-
-Critical ROCm libraries for TensorFlow
-===============================================================================
-
-TensorFlow depends on multiple components and the supported features of those
-components can affect the TensorFlow ROCm supported feature set. The versions
-in the following table refer to the first TensorFlow version where the ROCm
-library was introduced as a dependency. The versions described
-are available in ROCm :version:`rocm_version`.
-
-.. list-table::
-    :widths: 25, 10, 35, 30
-    :header-rows: 1
-
-    * - ROCm library
-      - Version
-      - Purpose
-      - Used in
-    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
-      - :version-ref:`hipBLAS rocm_version`
-      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
-        matrix and vector operations.
-      - Accelerates operations like ``tf.matmul``, ``tf.linalg.matmul``, and
-        other matrix multiplications commonly used in neural network layers.
-    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
-      - :version-ref:`hipBLASLt rocm_version`
-      - Extends hipBLAS with additional optimizations like fused kernels and
-        integer tensor cores.
-      - Optimizes matrix multiplications and linear algebra operations used in
-        layers like dense, convolutional, and RNNs in TensorFlow.
-    * - `hipCUB <https://github.com/ROCm/hipCUB>`__
-      - :version-ref:`hipCUB rocm_version`
-      - Provides a C++ template library for parallel algorithms for reduction,
-        scan, sort and select.
-      - Supports operations like ``tf.reduce_sum``, ``tf.cumsum``, ``tf.sort``
-        and other tensor operations in TensorFlow, especially those involving
-        scanning, sorting, and filtering.
-    * - `hipFFT <https://github.com/ROCm/hipFFT>`__
-      - :version-ref:`hipFFT rocm_version`
-      - Accelerates Fast Fourier Transforms (FFT) for signal processing tasks.
-      - Used for operations like signal processing, image filtering, and
-        certain types of neural networks requiring FFT-based transformations.
-    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`__
-      - :version-ref:`hipSOLVER rocm_version`
-      - Provides GPU-accelerated direct linear solvers for dense and sparse
-        systems.
-      - Optimizes linear algebra functions such as solving systems of linear
-        equations, often used in optimization and training tasks.
-    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`__
-      - :version-ref:`hipSPARSE rocm_version`
-      - Optimizes sparse matrix operations for efficient computations on sparse
-        data.
-      - Accelerates sparse matrix operations in models with sparse weight
-        matrices or activations, commonly used in neural networks.
-    * - `MIOpen <https://github.com/ROCm/MIOpen>`__
-      - :version-ref:`MIOpen rocm_version`
-      - Provides optimized deep learning primitives such as convolutions,
-        pooling,
-        normalization, and activation functions.
-      - Speeds up convolutional neural networks (CNNs) and other layers. Used
-        in TensorFlow for layers like ``tf.nn.conv2d``, ``tf.nn.relu``, and
-        ``tf.nn.lstm_cell``.
-    * - `RCCL <https://github.com/ROCm/rccl>`__
-      - :version-ref:`RCCL rocm_version`
-      - Optimizes for multi-GPU communication for operations like AllReduce and
-        Broadcast.
-      - Distributed data parallel training (``tf.distribute.MirroredStrategy``).
-        Handles communication in multi-GPU setups.
-    * - `rocThrust <https://github.com/ROCm/rocThrust>`__
-      - :version-ref:`rocThrust rocm_version`
-      - Provides a C++ template library for parallel algorithms like sorting,
-        reduction, and scanning.
-      - Reduction operations like ``tf.reduce_sum``, ``tf.cumsum`` for computing
-        the cumulative sum of elements along a given axis or ``tf.unique`` to
-        finds unique elements in a tensor can use rocThrust.
-
-Supported and unsupported features
-===============================================================================
-
-The following section maps supported data types and GPU-accelerated TensorFlow
-features to their minimum supported ROCm and TensorFlow versions.
-
-Data types
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The data type of a tensor is specified using the ``dtype`` attribute or
-argument, and TensorFlow supports a wide range of data types for different use
-cases.
-
-The basic, single data types of `tf.dtypes <https://www.tensorflow.org/api_docs/python/tf/dtypes>`__
-are as follows:
-
-.. list-table::
-    :header-rows: 1
-
-    * - Data type
-      - Description
-      - Since TensorFlow
-      - Since ROCm
-    * - ``bfloat16``
-      - 16-bit bfloat (brain floating point).
-      - 1.0.0
-      - 1.7
-    * - ``bool``
-      - Boolean.
-      - 1.0.0
-      - 1.7
-    * - ``complex128``
-      - 128-bit complex.
-      - 1.0.0
-      - 1.7
-    * - ``complex64``
-      - 64-bit complex.
-      - 1.0.0
-      - 1.7
-    * - ``double``
-      - 64-bit (double precision) floating-point.
-      - 1.0.0
-      - 1.7
-    * - ``float16``
-      - 16-bit (half precision) floating-point.
-      - 1.0.0
-      - 1.7
-    * - ``float32``
-      - 32-bit (single precision) floating-point.
-      - 1.0.0
-      - 1.7
-    * - ``float64``
-      - 64-bit (double precision) floating-point.
-      - 1.0.0
-      - 1.7
-    * - ``half``
-      - 16-bit (half precision) floating-point.
-      - 2.0.0
-      - 2.0
-    * - ``int16``
-      - Signed 16-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``int32``
-      - Signed 32-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``int64``
-      - Signed 64-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``int8``
-      - Signed 8-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``qint16``
-      - Signed quantized 16-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``qint32``
-      - Signed quantized 32-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``qint8``
-      - Signed quantized 8-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``quint16``
-      - Unsigned quantized 16-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``quint8``
-      - Unsigned quantized 8-bit integer.
-      - 1.0.0
-      - 1.7
-    * - ``resource``
-      - Handle to a mutable, dynamically allocated resource.
-      - 1.0.0
-      - 1.7
-    * - ``string``
-      - Variable-length string, represented as byte array.
-      - 1.0.0
-      - 1.7
-    * - ``uint16``
-      - Unsigned 16-bit (word) integer.
-      - 1.0.0
-      - 1.7
-    * - ``uint32``
-      - Unsigned 32-bit (dword) integer.
-      - 1.5.0
-      - 1.7
-    * - ``uint64``
-      - Unsigned 64-bit (qword) integer.
-      - 1.5.0
-      - 1.7
-    * - ``uint8``
-      - Unsigned 8-bit (byte) integer.
-      - 1.0.0
-      - 1.7
-    * - ``variant``
-      - Data of arbitrary type (known at runtime).
-      - 1.4.0
-      - 1.7
-
-Features
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-This table provides an overview of key features in TensorFlow and their
-availability in ROCm.
-
-.. list-table::
-    :header-rows: 1
-
-    * - Module
-      - Description
-      - Since TensorFlow
-      - Since ROCm
-    * - ``tf.linalg`` (Linear Algebra)
-      - Operations for matrix and tensor computations, such as
-        ``tf.linalg.matmul`` (matrix multiplication), ``tf.linalg.inv``
-        (matrix inversion) and ``tf.linalg.cholesky`` (Cholesky decomposition).
-        These leverage GPUs for high-performance linear algebra operations.
-      - 1.4
-      - 1.8.2
-    * - ``tf.nn`` (Neural Network Operations)
-      - GPU-accelerated building blocks for deep learning models, such as 2D
-        convolutions with ``tf.nn.conv2d``, max pooling operations with
-        ``tf.nn.max_pool``, activation functions like ``tf.nn.relu`` or softmax
-        for output layers with ``tf.nn.softmax``.
-      - 1.0
-      - 1.8.2
-    * - ``tf.image`` (Image Processing)
-      - GPU-accelerated functions for image preprocessing and augmentations,
-        such as resize images with ``tf.image.resize``, flip images horizontally
-        with ``tf.image.flip_left_right`` and adjust image brightness randomly
-        with ``tf.image.random_brightness``.
-      - 1.1
-      - 1.8.2
-    * - ``tf.keras`` (High-Level API)
-      - GPU acceleration for Keras layers and models, including dense layers
-        (``tf.keras.layers.Dense``), convolutional layers
-        (``tf.keras.layers.Conv2D``) and recurrent layers
-        (``tf.keras.layers.LSTM``).
-      - 1.4
-      - 1.8.2
-    * - ``tf.math`` (Mathematical Operations)
-      - GPU-accelerated mathematical operations, such as sum across dimensions
-        with ``tf.math.reduce_sum``, elementwise exponentiation with
-        ``tf.math.exp`` and sigmoid activation (``tf.math.sigmoid``).
-      - 1.5
-      - 1.8.2
-    * - ``tf.signal`` (Signal Processing)
-      - Functions for spectral analysis and signal transformations.
-      - 1.13
-      - 2.1
-    * - ``tf.data`` (Data Input Pipeline)
-      - GPU-accelerated data preprocessing for efficient input pipelines,
-        Prefetching with ``tf.data.experimental.AUTOTUNE``. GPU-enabled
-        transformations like map and batch.
-      - 1.4
-      - 1.8.2
-    * - ``tf.distribute`` (Distributed Training)
-      - Enabling to scale computations across multiple devices on a single
-        machine or across multiple machines.
-      - 1.13
-      - 2.1
-    * - ``tf.random`` (Random Number Generation)
-      - GPU-accelerated random number generation
-      - 1.12
-      - 1.9.2
-    * - ``tf.TensorArray`` (Dynamic Array Operations)
-      - Enables dynamic tensor manipulation on GPUs.
-      - 1.0
-      - 1.8.2
-    * - ``tf.sparse`` (Sparse Tensor Operations)
-      - GPU-accelerated sparse matrix manipulations.
-      - 1.9
-      - 1.9.0
-    * - ``tf.experimental.numpy``
-      - GPU-accelerated NumPy-like API for numerical computations.
-      - 2.4
-      - 4.1.1
-    * - ``tf.RaggedTensor``
-      - Handling of variable-length sequences and ragged tensors with GPU
-        support.
-      - 1.13
-      - 2.1
-    * - ``tf.function`` with XLA (Accelerated Linear Algebra)
-      - Enable GPU-accelerated functions in optimization.
-      - 1.14
-      - 2.4
-    * - ``tf.quantization``
-      - Quantized operations for inference, accelerated on GPUs.
-      - 1.12
-      - 1.9.2
-
-Distributed library features
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Enables developers to scale computations across multiple devices on a single machine or
-across multiple machines.
-
-.. list-table::
-   :header-rows: 1
-
-   * - Feature
-     - Description
-     - Since TensorFlow
-     - Since ROCm
-   * - ``MultiWorkerMirroredStrategy``
-     - Synchronous training across multiple workers using mirrored variables.
-     - 2.0
-     - 3.0
-   * - ``MirroredStrategy``
-     - Synchronous training across multiple GPUs on one machine.
-     - 1.5
-     - 2.5
-   * - ``TPUStrategy``
-     - Efficiently trains models on Google TPUs.
-     - 1.9
-     - ❌
-   * - ``ParameterServerStrategy``
-     - Asynchronous training using parameter servers for variable management.
-     - 2.1
-     - 4.0
-   * - ``CentralStorageStrategy``
-     - Keeps variables on a single device and performs computation on multiple
-       devices.
-     - 2.3
-     - 4.1
-   * - ``CollectiveAllReduceStrategy``
-     - Synchronous training across multiple devices and hosts.
-     - 1.14
-     - 3.5
-   * - Distribution Strategies API
-     - High-level API to simplify distributed training configuration and
-       execution.
-     - 1.10
-     - 3.0
-
-Unsupported TensorFlow features
-===============================================================================
-
-The following are GPU-accelerated TensorFlow features not currently supported by
-ROCm.
-
-.. list-table::
-    :header-rows: 1
-
-    * - Feature
-      - Description
-      - Since TensorFlow
-    * - Mixed Precision with TF32
-      - Mixed precision with TF32 is used for matrix multiplications,
-        convolutions, and other linear algebra operations, particularly in
-        deep learning workloads like CNNs and transformers.
-      - 2.4
-    * - ``tf.distribute.TPUStrategy``
-      - Efficiently trains models on Google TPUs.
-      - 1.9
-
-Use cases and recommendations
-===============================================================================
-
-* The `Training a Neural Collaborative Filtering (NCF) Recommender on an AMD
-  GPU <https://rocm.blogs.amd.com/artificial-intelligence/ncf/README.html>`__
-  blog post discusses training an NCF recommender system using TensorFlow. It
-  explains how NCF improves traditional collaborative filtering methods by
-  leveraging neural networks to model non-linear user-item interactions. The
-  post outlines the implementation using the recommenders library, focusing on
-  the use of implicit data (for example, user interactions like viewing or
-  purchasing) and how it addresses challenges like the lack of negative values.
-
-* The `Creating a PyTorch/TensorFlow code environment on AMD GPUs
-  <https://rocm.blogs.amd.com/software-tools-optimization/pytorch-tensorflow-env/README.html>`__
-  blog post provides instructions for creating a machine learning environment
-  for PyTorch and TensorFlow on AMD GPUs using ROCm. It covers steps like
-  installing the libraries, cloning code repositories, installing dependencies,
-  and troubleshooting potential issues with CUDA-based code. Additionally, it
-  explains how to HIPify code (port CUDA code to HIP) and manage Docker images
-  for a better experience on AMD GPUs. This guide aims to help data scientists
-  and ML practitioners adapt their code for AMD GPUs.
-
-For more use cases and recommendations, see the `ROCm Tensorflow blog posts <https://rocm.blogs.amd.com/blog/tag/tensorflow.html>`__.
--- a/docs/compatibility/ml-compatibility/verl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/verl-compatibility.rst
@@ -1,86 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: verl compatibility
-   :keywords: GPU, verl compatibility
-
-.. version-set:: rocm_version latest
-
-*******************************************************************************
-verl compatibility
-*******************************************************************************
-
-Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs). 
-verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support.
-
-* See the `verl documentation <https://verl.readthedocs.io/en/latest/>`_ for more information about verl. 
-* The official verl GitHub repository is `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`_.
-* Use the AMD-validated :ref:`Docker images <verl-docker-compat>` with ROCm and verl preinstalled. 
-* See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>` to install and get started.
-
-.. note::
-
-	verl is supported on ROCm 6.2.0.
-
-.. _verl-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-The benefits of verl in large-scale reinforcement learning from human feedback (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`_ blog.
-
-.. _verl-supported_features:
-
-Supported features
-===============================================================================
-
-The following table shows verl on ROCm support for GPU-accelerated modules.
-
-.. list-table::
-    :header-rows: 1
-
-    * - Module
-      - Description
-      - verl version
-      - ROCm version
-    * - ``FSDP``
-      - Training engine
-      - 0.3.0.post0
-      - 6.2.0
-    * - ``vllm``
-      - Inference engine
-      - 0.3.0.post0
-      - 6.2.0
-
-.. _verl-docker-compat:
-
-Docker image compatibility
-================================================================================
-
-.. |docker-icon| raw:: html
-
-   <i class="fab fa-docker"></i>
-
-AMD validates and publishes ready-made `ROCm verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
-with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the available verl versions from the official Docker Hub. 
-
-.. list-table:: 
-    :header-rows: 1
-
-    *   - Docker image
-        - ROCm
-        - verl
-        - Ubuntu
-        - Pytorch
-        - Python
-        - vllm
-
-    *   - .. raw:: html
-
-            <a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
-        - `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`_
-        - `0.3.0post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`_
-        - 20.04
-        - `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
-        - `3.9.19 <https://www.python.org/downloads/release/python-3919/>`_
-        - `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`_
--- a/docs/conceptual/ai-pytorch-inception.md
+++ b/docs/conceptual/ai-pytorch-inception.md
--- a/docs/conceptual/cmake-packages.rst
+++ b/docs/conceptual/cmake-packages.rst
@@ -1,407 +0,0 @@
-.. meta::
-   :description: Using CMake
-   :keywords: CMake, dependencies, HIP, C++, AMD, ROCm
-
-*********************************
-Using CMake
-*********************************
-
-Most components in ROCm support CMake. Projects depending on header-only or
-library components typically require CMake 3.5 or higher whereas those wanting
-to make use of the CMake HIP language support will require CMake 3.21 or higher.
-
-Finding dependencies
-====================
-
-.. note::
-
-  For a complete
-  reference on how to deal with dependencies in CMake, refer to the CMake docs
-  on `find_package
-  <https://cmake.org/cmake/help/latest/command/find_package.html>`_ and the
-  `Using Dependencies Guide
-  <https://cmake.org/cmake/help/latest/guide/using-dependencies/index.html>`_
-  to get an overview of CMake related facilities.
-
-In short, CMake supports finding dependencies in two ways:
-
-* In Module mode, it consults a file ``Find<PackageName>.cmake`` which tries to find the component
-  in typical install locations and layouts. CMake ships a few dozen such scripts, but users and projects
-  may ship them as well.
-
-* In Config mode, it locates a file named ``<packagename>-config.cmake`` or
-  ``<PackageName>Config.cmake`` which describes the installed component in all regards needed to
-  consume it.
-
-ROCm predominantly relies on Config mode, one notable exception being the Module
-driving the compilation of HIP programs on NVIDIA runtimes. As such, when
-dependencies are not found in standard system locations, one either has to
-instruct CMake to search for package config files in additional folders using
-the ``CMAKE_PREFIX_PATH`` variable (a semi-colon separated list of file system
-paths), or using ``<PackageName>_ROOT`` variable on a project-specific basis.
-
-There are nearly a dozen ways to set these variables. One may be more convenient
-over the other depending on your workflow. Conceptually the simplest is adding
-it to your CMake configuration command on the command line via
-``-D CMAKE_PREFIX_PATH=....`` . AMD packaged ROCm installs can typically be
-added to the config file search paths such as:
-
-*  Windows: ``-D CMAKE_PREFIX_PATH=${env:HIP_PATH}``
-
-*  Linux: ``-D CMAKE_PREFIX_PATH=/opt/rocm``
-
-ROCm provides the respective *config-file* packages, and this enables
-``find_package`` to be used directly. ROCm does not require any Find module as
-the *config-file* packages are shipped with the upstream projects, such as
-rocPRIM and other ROCm libraries.
-
-For a complete guide on where and how ROCm may be installed on a system, refer
-to the installation guides for
-`Linux <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html>`_
-and
-`Windows <https://rocm.docs.amd.com/projects/install-on-windows/en/latest/index.html>`_.
-
-Using HIP in CMake
-==================
-
-ROCm components providing a C/C++ interface support consumption via any
-C/C++ toolchain that CMake knows how to drive. ROCm also supports the CMake HIP
-language features, allowing users to program using the HIP single-source
-programming model. When a program (or translation-unit) uses the HIP API without
-compiling any GPU device code, HIP can be treated in CMake as a simple C/C++
-library.
-
-Using the HIP single-source programming model
---------------------------------------------
-
-Source code written in the HIP dialect of C++ typically uses the `.hip`
-extension. When the HIP CMake language is enabled, it will automatically
-associate such source files with the HIP toolchain being used.
-
-.. code-block:: cmake
-
-  cmake_minimum_required(VERSION 3.21) # HIP language support requires 3.21
-  cmake_policy(VERSION 3.21.3...3.27)
-  project(MyProj LANGUAGES HIP)
-  add_executable(MyApp Main.hip)
-
-Should you have existing CUDA code that is from the source compatible subset of
-HIP, you can tell CMake that despite their `.cu` extension, they're HIP sources.
-Do note that this mostly facilitates compiling kernel code-only source files,
-as host-side CUDA API won't compile in this fashion.
-
-.. code-block:: cmake
-
-  add_library(MyLib MyLib.cu)
-  set_source_files_properties(MyLib.cu PROPERTIES LANGUAGE HIP)
-
-CMake itself only hosts part of the HIP language support, such as defining
-HIP-specific properties, etc. while the other half ships with the HIP
-implementation, such as ROCm. CMake will search for a file
-`hip-lang-config.cmake` describing how the the properties defined by CMake
-translate to toolchain invocations. If one installs ROCm using non-standard
-methods or layouts and CMake can't locate this file or detect parts of the SDK,
-there's a catch-all, last resort variable consulted locating this file,
-``-D CMAKE_HIP_COMPILER_ROCM_ROOT:PATH=`` which should be set the root of the
-ROCm installation.
-
-.. note::
-    Imported targets defined by `hip-lang-config.cmake` are for internal use
-    only.
-
-If the user doesn't provide a semi-colon delimited list of device architectures
-via ``CMAKE_HIP_ARCHITECTURES``, CMake will select some sensible default. It is
-advised though that if a user knows what devices they wish to target, then set
-this variable explicitly.
-
-Consuming ROCm C/C++ libraries
------------------------------
-
-Libraries such as rocBLAS, rocFFT, MIOpen, etc. behave as C/C++ libraries.
-Illustrated in the example below is a C++ application using MIOpen from CMake.
-It calls ``find_package(miopen)``, which provides the ``MIOpen`` imported
-target. This can be linked with ``target_link_libraries``
-
-.. code-block:: cmake
-
-  cmake_minimum_required(VERSION 3.5) # find_package(miopen) requires 3.5
-  cmake_policy(VERSION 3.5...3.27)
-  project(MyProj LANGUAGES CXX)
-  find_package(miopen)
-  add_library(MyLib ...)
-  target_link_libraries(MyLib PUBLIC MIOpen)
-
-.. note::
-
-  Most libraries are designed as host-only API, so using a GPU device
-  compiler is not necessary for downstream projects unless they use GPU device
-  code.
-
-Consuming the HIP API in C++ code
---------------------------------
-
-Consuming the HIP API without compiling single-source GPU device code can be
-done using any C++ compiler. The ``find_package(hip)`` provides the
-``hip::host`` imported target to use HIP in this scenario.
-
-.. code-block:: cmake
-
-  cmake_minimum_required(VERSION 3.5) # find_package(hip) requires 3.5
-  cmake_policy(VERSION 3.5...3.27)
-  project(MyProj LANGUAGES CXX)
-  find_package(hip REQUIRED)
-  add_executable(MyApp ...)
-  target_link_libraries(MyApp PRIVATE hip::host)
-
-When mixing such ``CXX`` sources with ``HIP`` sources holding device-code, link
-only to `hip::host`. If HIP sources don't have `.hip` as their extension, use
-`set_source_files_properties(<hip_sources>... PROPERTIES LANGUAGE HIP)` on them.
-Linking to `hip::host` will set all the necessary flags for the ``CXX`` sources
-while ``HIP`` sources inherit all flags from the built-in language support.
-Having HIP sources in a target will turn the |LINK_LANG|_ into ``HIP``.
-
-.. |LINK_LANG| replace:: ``LINKER_LANGUAGE``
-.. _LINK_LANG: https://cmake.org/cmake/help/latest/prop_tgt/LINKER_LANGUAGE.html
-
-Compiling device code in C++ language mode
------------------------------------------
-
-.. attention::
-
-  The workflow detailed here is considered legacy and is shown for
-  understanding's sake. It pre-dates the existence of HIP language support in
-  CMake. If source code has HIP device code in it, it is a HIP source file
-  and should be compiled as such. Only resort to the method below if your
-  HIP-enabled CMake code path can't mandate CMake version 3.21.
-
-If code uses the HIP API and compiles GPU device code, it requires using a
-device compiler. The compiler for CMake can be set using either the
-``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER`` variable or using the ``CC``
-and ``CXX`` environment variables. This can be set when configuring CMake or
-put into a CMake toolchain file. The device compiler must be set to a
-compiler that supports AMD GPU targets, which is usually Clang.
-
-The ``find_package(hip)`` provides the ``hip::device`` imported target to add
-all the flags necessary for device compilation.
-
-.. code-block:: cmake
-
-  cmake_minimum_required(VERSION 3.8) # cxx_std_11 requires 3.8
-  cmake_policy(VERSION 3.8...3.27)
-  project(MyProj LANGUAGES CXX)
-  find_package(hip REQUIRED)
-  add_library(MyLib ...)
-  target_link_libraries(MyLib PRIVATE hip::device)
-  target_compile_features(MyLib PRIVATE cxx_std_11)
-
-.. note::
-
-  Compiling for the GPU device requires at least C++11.
-
-This project can then be configured with the following CMake commands:
-
-*  Windows: ``cmake -D CMAKE_CXX_COMPILER:PATH=${env:HIP_PATH}\bin\clang++.exe``
-*  Linux: ``cmake -D CMAKE_CXX_COMPILER:PATH=/opt/rocm/bin/amdclang++``
-
-Which use the device compiler provided from the binary packages of
-`ROCm HIP SDK <https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html>`_ and
-`repo.radeon.com <https://repo.radeon.com>`_ respectively.
-
-When using the ``CXX`` language support to compile HIP device code, selecting the
-target GPU architectures is done via setting the ``GPU_TARGETS`` variable.
-``CMAKE_HIP_ARCHITECTURES`` only exists when the HIP language is enabled. By
-default, this is set to some subset of the currently supported architectures of
-AMD ROCm. It can be set to the CMake option ``-D GPU_TARGETS="gfx1032;gfx1035"``.
-
-ROCm CMake packages
-------------------
-
-+-----------+----------+--------------------------------------------------------+
-| Component | Package  | Targets                                                |
-+===========+==========+========================================================+
-| HIP       | hip      | ``hip::host``, ``hip::device``                         |
-+-----------+----------+--------------------------------------------------------+
-| rocPRIM   | rocprim  | ``roc::rocprim``                                       |
-+-----------+----------+--------------------------------------------------------+
-| rocThrust | rocthrust| ``roc::rocthrust``                                     |
-+-----------+----------+--------------------------------------------------------+
-| hipCUB    | hipcub   | ``hip::hipcub``                                        |
-+-----------+----------+--------------------------------------------------------+
-| rocRAND   | rocrand  | ``roc::rocrand``                                       |
-+-----------+----------+--------------------------------------------------------+
-| rocBLAS   | rocblas  | ``roc::rocblas``                                       |
-+-----------+----------+--------------------------------------------------------+
-| rocSOLVER | rocsolver| ``roc::rocsolver``                                     |
-+-----------+----------+--------------------------------------------------------+
-| hipBLAS   | hipblas  | ``roc::hipblas``                                       |
-+-----------+----------+--------------------------------------------------------+
-| rocFFT    | rocfft   | ``roc::rocfft``                                        |
-+-----------+----------+--------------------------------------------------------+
-| hipFFT    | hipfft   | ``hip::hipfft``                                        |
-+-----------+----------+--------------------------------------------------------+
-| rocSPARSE | rocsparse| ``roc::rocsparse``                                     |
-+-----------+----------+--------------------------------------------------------+
-| hipSPARSE | hipsparse| ``roc::hipsparse``                                     |
-+-----------+----------+--------------------------------------------------------+
-| rocALUTION|rocalution| ``roc::rocalution``                                    |
-+-----------+----------+--------------------------------------------------------+
-| RCCL      | rccl     | ``rccl``                                               |
-+-----------+----------+--------------------------------------------------------+
-| MIOpen    | miopen   | ``MIOpen``                                             |
-+-----------+----------+--------------------------------------------------------+
-| MIGraphX  | migraphx | ``migraphx::migraphx``, ``migraphx::migraphx_c``,      |
-|           |          | ``migraphx::migraphx_cpu``, ``migraphx::migraphx_gpu``,|
-|           |          | ``migraphx::migraphx_onnx``, ``migraphx::migraphx_tf`` |
-+-----------+----------+--------------------------------------------------------+
-
-Using CMake presets
-===================
-
-CMake command lines depending on how specific users like to be when compiling
-code can grow to unwieldy lengths. This is the primary reason why projects tend
-to bake script snippets into their build definitions controlling compiler
-warning levels, changing CMake defaults (``CMAKE_BUILD_TYPE`` or
-``BUILD_SHARED_LIBS`` just to name a few) and all sorts anti-patterns, all in
-the name of convenience.
-
-Load on the command-line interface (CLI) starts immediately by selecting a
-toolchain, the set of utilities used to compile programs. To ease some of the
-toolchain related pains, CMake does consult the ``CC`` and ``CXX`` environmental
-variables when setting a default ``CMAKE_C[XX]_COMPILER`` respectively, but that
-is just the tip of the iceberg. There's a fair number of variables related to
-just the toolchain itself (typically supplied using
-`toolchain files <https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html>`_
-), and then we still haven't talked about user preference or project-specific
-options.
-
-IDEs supporting CMake (Visual Studio, Visual Studio Code, CLion, etc.) all came
-up with their own way to register command-line fragments of different purpose in
-a setup-and-forget fashion for quick assembly using graphical front-ends. This is
-all nice, but configurations aren't portable, nor can they be reused in
-Continuous Integration (CI) pipelines. CMake has condensed existing practice
-into a portable JSON format that works in all IDEs and can be invoked from any
-command line. This is
-`CMake Presets <https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html>`_.
-
-There are two types of preset files: one supplied by the project, called
-``CMakePresets.json`` which is meant to be committed to version control,
-typically used to drive CI; and one meant for the user to provide, called
-``CMakeUserPresets.json``, typically used to house user preference and adapting
-the build to the user's environment. These JSON files are allowed to include
-other JSON files and the user presets always implicitly includes the non-user
-variant.
-
-Using HIP with presets
----------------------
-
-Following is an example ``CMakeUserPresets.json`` file which actually compiles
-the `amd/rocm-examples <https://github.com/amd/rocm-examples>`_ suite of sample
-applications on a typical ROCm installation:
-
-.. code-block:: json
-
-  {
-    "version": 3,
-    "cmakeMinimumRequired": {
-      "major": 3,
-      "minor": 21,
-      "patch": 0
-    },
-    "configurePresets": [
-      {
-        "name": "layout",
-        "hidden": true,
-        "binaryDir": "${sourceDir}/build/${presetName}",
-        "installDir": "${sourceDir}/install/${presetName}"
-      },
-      {
-        "name": "generator-ninja-multi-config",
-        "hidden": true,
-        "generator": "Ninja Multi-Config"
-      },
-      {
-        "name": "toolchain-makefiles-c/c++-amdclang",
-        "hidden": true,
-        "cacheVariables": {
-          "CMAKE_C_COMPILER": "/opt/rocm/bin/amdclang",
-          "CMAKE_CXX_COMPILER": "/opt/rocm/bin/amdclang++",
-          "CMAKE_HIP_COMPILER": "/opt/rocm/bin/amdclang++"
-        }
-      },
-      {
-        "name": "clang-strict-iso-high-warn",
-        "hidden": true,
-        "cacheVariables": {
-          "CMAKE_C_FLAGS": "-Wall -Wextra -pedantic",
-          "CMAKE_CXX_FLAGS": "-Wall -Wextra -pedantic",
-          "CMAKE_HIP_FLAGS": "-Wall -Wextra -pedantic"
-        }
-      },
-      {
-        "name": "ninja-mc-rocm",
-        "displayName": "Ninja Multi-Config ROCm",
-        "inherits": [
-          "layout",
-          "generator-ninja-multi-config",
-          "toolchain-makefiles-c/c++-amdclang",
-          "clang-strict-iso-high-warn"
-        ]
-      }
-    ],
-    "buildPresets": [
-      {
-        "name": "ninja-mc-rocm-debug",
-        "displayName": "Debug",
-        "configuration": "Debug",
-        "configurePreset": "ninja-mc-rocm"
-      },
-      {
-        "name": "ninja-mc-rocm-release",
-        "displayName": "Release",
-        "configuration": "Release",
-        "configurePreset": "ninja-mc-rocm"
-      },
-      {
-        "name": "ninja-mc-rocm-debug-verbose",
-        "displayName": "Debug (verbose)",
-        "configuration": "Debug",
-        "configurePreset": "ninja-mc-rocm",
-        "verbose": true
-      },
-      {
-        "name": "ninja-mc-rocm-release-verbose",
-        "displayName": "Release (verbose)",
-        "configuration": "Release",
-        "configurePreset": "ninja-mc-rocm",
-        "verbose": true
-      }
-    ],
-    "testPresets": [
-      {
-        "name": "ninja-mc-rocm-debug",
-        "displayName": "Debug",
-        "configuration": "Debug",
-        "configurePreset": "ninja-mc-rocm",
-        "execution": {
-          "jobs": 0
-        }
-      },
-      {
-        "name": "ninja-mc-rocm-release",
-        "displayName": "Release",
-        "configuration": "Release",
-        "configurePreset": "ninja-mc-rocm",
-        "execution": {
-          "jobs": 0
-        }
-      }
-    ]
-  }
-
-.. note::
-
-  Getting presets to work reliably on Windows requires some CMake improvements
-  and/or support from compiler vendors. (Refer to
-  `Add support to the Visual Studio generators <https://gitlab.kitware.com/cmake/cmake/-/issues/24245>`_
-  and `Sourcing environment scripts <https://gitlab.kitware.com/cmake/cmake/-/issues/21619>`_
-  .)
--- a/docs/conceptual/compiler-topics.md
+++ b/docs/conceptual/compiler-topics.md
@@ -1,14 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="AMD ROCm documentation">
-  <meta name="keywords" content="documentation, guides, installation, compatibility, support,
-  reference, ROCm, AMD">
-</head>
-
-# Using compiler features
-
-The following topics describe using specific features of the compilation tools:
-
-* [ROCm compiler infrastructure](https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html)
-* [Using AddressSanitizer](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html)
-* [OpenMP support](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html)
--- a/docs/conceptual/file-reorg.md
+++ b/docs/conceptual/file-reorg.md
@@ -1,172 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="ROCm Linux Filesystem Hierarchy Standard reorganization">
-  <meta name="keywords" content="FHS, Linux Filesystem Hierarchy Standard, directory structure,
-  AMD, ROCm">
-</head>
-
-# ROCm Linux Filesystem Hierarchy Standard reorganization
-
-## Introduction
-
-The ROCm Software has adopted the Linux Filesystem Hierarchy Standard (FHS) [https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html](https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html) in order to to ensure ROCm is consistent with standard open source conventions. The following sections specify how current and future releases of ROCm adhere to FHS, how the previous ROCm file system is supported, and how improved versioning specifications are applied to ROCm.
-
-## Adopting the FHS
-
-In order to standardize ROCm directory structure and directory content layout ROCm has adopted the [FHS](https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html), adhering to open source conventions for Linux-based distribution. FHS ensures internal consistency within the ROCm stack, as well as external consistency with other systems and distributions. The ROCm proposed file structure is outlined below:
-
-```none
-/opt/rocm-<ver>
-    | -- bin
-         | -- all public binaries
-    | -- lib
-         | -- lib<soname>.so->lib<soname>.so.major->lib<soname>.so.major.minor.patch
-              (public libaries to link with applications)
-         | -- <component>
-              | -- architecture dependent libraries and binaries used internally by components
-         | -- cmake
-              | -- <component>
-                   | --<component>-config.cmake
-    | -- libexec
-         | -- <component>
-              | -- non ISA/architecture independent executables used internally by components
-    | -- include
-         | -- <component>
-              | -- public header files
-    | -- share
-         | -- html
-              | -- <component>
-                   | -- html documentation
-         | -- info
-              | -- <component>
-                   | -- info files
-         | -- man
-              | -- <component>
-                   | -- man pages
-         | -- doc
-              | -- <component>
-                   | -- license files
-         | -- <component>
-              | -- samples
-              | -- architecture independent misc files
-```
-
-## Changes from earlier ROCm versions
-
-The following table provides a brief overview of the new ROCm FHS layout, compared to the layout of earlier ROCm versions. Note that /opt/ is used to denote the default rocm-installation-path and should be replaced in case of a non-standard installation location of the ROCm distribution.
-
-```none
- ______________________________________________________
-|  New ROCm Layout            |  Previous ROCm Layout  |
-|_____________________________|________________________|
-| /opt/rocm-<ver>             | /opt/rocm-<ver>        |
-|     | -- bin                |     | -- bin           |
-|     | -- lib                |     | -- lib           |
-|          | -- cmake         |     | -- include       |
-|     | -- libexec            |     | -- <component_1> |
-|     | -- include            |          | -- bin      |
-|          | -- <component_1> |          | -- cmake    |
-|     | -- share              |          | -- doc      |
-|          | -- html          |          | -- lib      |
-|          | -- info          |          | -- include  |
-|          | -- man           |          | -- samples  |
-|          | -- doc           |     | -- <component_n> |
-|          | -- <component_1> |          | -- bin      |
-|               | -- samples  |          | -- cmake    |
-|               | -- ..       |          | -- doc      |
-|          | -- <component_n> |          | -- lib      |
-|               | -- samples  |          | -- include  |
-|               | -- ..       |          | -- samples  |
-|______________________________________________________|
-```
-
-## ROCm FHS reorganization: backward compatibility
-
-The FHS file organization for ROCm was first introduced in the release of ROCm 5.2 . Backward compatibility was implemented to make sure users could still run their ROCm applications while transitioning to the new FHS. ROCm has moved header files and libraries to their new locations as indicated in the above structure, and included symbolic-links and wrapper header files in their old location for backward compatibility. The following sections detail ROCm backward compatibility implementation for wrapper header files, executable files, library files and CMake config files.
-
-### Wrapper header files
-
-Wrapper header files are placed in the old location (
-`/opt/rocm-<ver>/<component>/include`) with a warning message to include files
-from the new location (`/opt/rocm-<ver>/include`) as shown in the example below.
-
-```cpp
-#pragma message "This file is deprecated. Use file from include path /opt/rocm-ver/include/ and prefix with hip."
-#include <hip/hip_runtime.h>
-```
-
-* Starting at ROCm 5.2 release, the deprecation for backward compatibility wrapper header files is: `#pragma` message announcing `#warning`.
-* Starting from ROCm 6.0 (tentatively) backward compatibility for wrapper header files will be removed, and the `#pragma` message will be announcing `#error`.
-
-### Executable files
-
-Executable files are available in the `/opt/rocm-<ver>/bin` folder. For backward
-compatibility, the old library location (`/opt/rocm-<ver>/<component>/bin`) has a
-soft link to the library at the new location. Soft links will be removed in a
-future release, tentatively ROCm v6.0.
-
-```bash
-$ ls -l /opt/rocm/hip/bin/
-lrwxrwxrwx 1 root root   24 Jan 1 23:32 hipcc -> ../../bin/hipcc
-```
-
-### Library files
-
-Library files are available in the `/opt/rocm-<ver>/lib` folder. For backward
-compatibility, the old library location (`/opt/rocm-<ver>/<component>/lib`) has a
-soft link to the library at the new location. Soft links will be removed in a
-future release, tentatively ROCm v6.0.
-
-```shell
-$ ls -l /opt/rocm/hip/lib/
-drwxr-xr-x 4 root root 4096 Jan 1 10:45 cmake
-lrwxrwxrwx 1 root root   24 Jan 1 23:32 libamdhip64.so -> ../../lib/libamdhip64.so
-```
-
-### CMake config files
-
-All CMake configuration files are available in the
-`/opt/rocm-<ver>/lib/cmake/<component>` folder. For backward compatibility, the
-old CMake locations (`/opt/rocm-<ver>/<component>/lib/cmake`) consist of a soft
-link to the new CMake config. Soft links will be removed in a future release,
-tentatively ROCm v6.0.
-
-```shell
-$ ls -l /opt/rocm/hip/lib/cmake/hip/
-lrwxrwxrwx 1 root root 42 Jan 1 23:32 hip-config.cmake -> ../../../../lib/cmake/hip/hip-config.cmake
-```
-
-## Changes required in applications using ROCm
-
-Applications using ROCm are advised to use the new file paths. As the old files
-will be deprecated in a future release. Applications have to make sure to include
-correct header file and use correct search paths.
-
-1. `#include<header_file.h>` needs to be changed to
-   `#include <component/header_file.h>`
-
-   For example: `#include <hip.h>` needs to change
-   to `#include <hip/hip.h>`
-
-2. Any variable in CMake or Makefiles pointing to component folder needs to
-   changed.
-
-   For example: `VAR1=/opt/rocm/hip` needs to be changed to `VAR1=/opt/rocm`
-   `VAR2=/opt/rocm/hsa` needs to be changed to `VAR2=/opt/rocm`
-
-3. Any reference to `/opt/rocm/<component>/bin` or `/opt/rocm/<component>/lib`
-   needs to be changed to `/opt/rocm/bin` and `/opt/rocm/lib/`, respectively.
-
-## Changes in versioning specifications
-
-In order to better manage ROCm dependencies specification and allow smoother releases of ROCm while avoiding dependency conflicts, ROCm software shall adhere to the following scheme when numbering and incrementing ROCm files versions:
-
-rocm-\<ver\>, where \<ver\> = \<x.y.z\>
-
-x.y.z denote: MAJOR.MINOR.PATCH
-
-z: PATCH - increment z when implementing backward compatible bug fixes.
-
-y: MINOR - increment y when implementing minor changes that add functionality but are still backward compatible.
-
-x: MAJOR - increment x when implementing major changes that are not backward compatible.
--- a/docs/conceptual/gpu-arch.md
+++ b/docs/conceptual/gpu-arch.md
@@ -1,72 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="GPU architecture">
-  <meta name="keywords" content="GPU architecture, architecture support, MI200, MI250, RDNA,
-  MI100, AMD Instinct">
-</head>
-
-(gpu-arch-documentation)=
-
-# GPU architecture documentation
-
-:::::{grid} 1 1 2 2
-:gutter: 1
-
-:::{grid-item-card}
-**AMD Instinct MI300 series**
-
-Review hardware aspects of the AMD Instinct™ MI300 series of GPU accelerators and the CDNA™ 3
-architecture.
-
-* [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
-* [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
-* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
-* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
-:::
-
-:::{grid-item-card}
-**AMD Instinct MI200 series**
-
-Review hardware aspects of the AMD Instinct™ MI200 series of GPU accelerators and the CDNA™ 2
-architecture.
-
-* [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md)
-* [AMD Instinct MI200/CDNA2 ISA](https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf)
-* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf)
-* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
-
-:::
-
-:::{grid-item-card}
-**AMD Instinct MI100**
-
-Review hardware aspects of the AMD Instinct™ MI100 series of GPU accelerators and the CDNA™ 1
-architecture.
-
-* [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md)
-* [AMD Instinct MI100/CDNA1 ISA](https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf)
-* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf)
-
-:::
-
-:::{grid-item-card}
-**RDNA**
-
-* [AMD RDNA4 ISA](https://www.amd.com/content/dam/amd/en/documents/radeon-tech-docs/instruction-set-architectures/rdna4-instruction-set-architecture.pdf)
-* [AMD RDNA3 ISA](https://www.amd.com/system/files/TechDocs/rdna3-shader-instruction-set-architecture-feb-2023_0.pdf)
-* [AMD RDNA2 ISA](https://www.amd.com/system/files/TechDocs/rdna2-shader-instruction-set-architecture.pdf)
-* [AMD RDNA ISA](https://www.amd.com/system/files/TechDocs/rdna-shader-instruction-set-architecture.pdf)
-
-:::
-
-:::{grid-item-card}
-**Older architectures**
-
-* [AMD Instinct MI50/Vega 7nm ISA](https://www.amd.com/system/files/TechDocs/vega-7nm-shader-instruction-set-architecture.pdf)
-* [AMD Instinct MI25/Vega ISA](https://www.amd.com/system/files/TechDocs/vega-shader-instruction-set-architecture.pdf)
-* [AMD GCN3 ISA](https://www.amd.com/system/files/TechDocs/gcn3-instruction-set-architecture.pdf)
-* [AMD Vega Architecture White Paper](https://en.wikichip.org/w/images/a/a1/vega-whitepaper.pdf)
-
-:::
-
-:::::
--- a/docs/conceptual/gpu-arch/mi100.md
+++ b/docs/conceptual/gpu-arch/mi100.md
@@ -1,95 +0,0 @@
---
-myst:
-  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI100 series architecture."
-    "keywords": "Instinct, MI100, microarchitecture, AMD, ROCm"
---
-
-# AMD Instinct™ MI100 microarchitecture
-
-The following image shows the node-level architecture of a system that
-comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ accelerators.
-The two EPYC processors are connected to each other with the AMD Infinity™
-fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such
-that each processor can access the available node memory as a single
-shared-memory domain in a non-uniform memory architecture (NUMA) fashion. In a
-2P, or dual-socket, configuration, three AMD Infinity™ fabric links are
-available to connect the processors plus one PCIe Gen 4 x16 link per processor
-can attach additional I/O devices such as the host adapters for the network
-fabric.
-
-![Structure of a single GCD in the AMD Instinct MI100 accelerator](../../data/conceptual/gpu-arch/image004.png "Node-level system architecture with two AMD EPYC™ processors and eight AMD Instinct™ accelerators.")
-
-In a typical node configuration, each processor can host up to four AMD
-Instinct™ accelerators that are attached using PCIe Gen 4 links at 16 GT/sec,
-which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive
-of four accelerators can participate in a fully connected, coherent AMD
-Instinct™ fabric that connects the four accelerators using 23 GT/sec AMD
-Infinity fabric links that run at a higher frequency than the inter-processor
-links. This inter-GPU link can be established in certified server systems if the
-GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity
-Fabric™ bridge for the AMD Instinct™ accelerators.
-
-## Microarchitecture
-
-The microarchitecture of the AMD Instinct accelerators is based on the AMD CDNA
-architecture, which targets compute applications such as high-performance
-computing (HPC) and AI & machine learning (ML) that run on everything from
-individual servers to the world's largest exascale supercomputers. The overall
-system architecture is designed for extreme scalability and compute performance.
-
-![Structure of the AMD Instinct accelerator (MI100 generation)](../../data/conceptual/gpu-arch/image005.png "Structure of the AMD Instinct accelerator (MI100 generation)")
-
-The above image shows the AMD Instinct accelerator with its PCIe Gen 4 x16
-link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host
-processor(s). It also shows the three AMD Infinity Fabric ports that provide
-high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local
-hive.
-
-On the left and right of the floor plan, the High Bandwidth Memory (HBM)
-attaches via the GPU memory controller.  The MI100 generation of the AMD
-Instinct accelerator offers four stacks of HBM generation 2 (HBM2) for a total
-of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the
-attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz.
-
-The execution units of the GPU are depicted in the above image as Compute
-Units (CU). There are a total 120 compute units that are physically organized
-into eight Shader Engines (SE) with fifteen compute units per shader engine.
-Each compute unit is further sub-divided into four SIMD units that process SIMD
-instructions of 16 data elements per instruction. This enables the CU to process
-64 data elements (a so-called 'wavefront') at a peak clock frequency of 1.5 GHz.
-Therefore, the theoretical maximum FP64 peak performance is 11.5 TFLOPS
-(`4 [SIMD units] x 16 [elements per instruction] x 120 [CU] x 1.5 [GHz]`).
-
-![Block diagram of an MI100 compute unit with detailed SIMD view of the AMD CDNA architecture](../../data/conceptual/gpu-arch/image006.png "An MI100 compute unit with detailed SIMD view of the AMD CDNA architecture")
-
-The preceding image shows the block diagram of a single CU of an AMD Instinct™
-MI100 accelerator and summarizes how instructions flow through the execution
-engines. The CU fetches the instructions via a 32KB instruction cache and moves
-them forward to execution via a dispatcher. The CU can handle up to ten
-wavefronts at a time and feed their instructions into the execution unit. The
-execution unit contains 256 vector general-purpose registers (VGPR) and 800
-scalar general-purpose registers (SGPR). The VGPR and SGPR are dynamically
-allocated to the executing wavefronts. A wavefront can access a maximum of 102
-scalar registers. Excess scalar-register usage will cause register spilling and
-thus may affect execution performance.
-
-A wavefront can occupy any number of VGPRs from 0 to 256, directly affecting
-occupancy; that is, the number of concurrently active wavefronts in the CU. For
-instance, with 119 VGPRs used, only two wavefronts can be active in the CU at
-the same time. With the instruction latency of four cycles per SIMD instruction,
-the occupancy should be as high as possible such that the compute unit can
-improve execution efficiency by scheduling instructions from multiple
-wavefronts.
-
-:::{table} Peak-performance capabilities of MI100 for different data types.
-:name: mi100-perf
-| Computation and Data Type | FLOPS/CLOCK/CU | Peak TFLOPS |
-| :------------------------ | :------------: | ----------: |
-| Vector FP64               | 64             | 11.5        |
-| Matrix FP32               | 256            | 46.1        |
-| Vector FP32               | 128            | 23.1        |
-| Matrix FP16               | 1024           | 184.6       |
-| Matrix BF16               | 512            | 92.3        |
-
-:::
--- a/docs/conceptual/gpu-arch/mi250.md
+++ b/docs/conceptual/gpu-arch/mi250.md
@@ -1,134 +0,0 @@
---
-myst:
-  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI250 series architecture."
-    "keywords": "Instinct, MI250, microarchitecture, AMD, ROCm"
---
-
-# AMD Instinct™ MI250 microarchitecture
-
-The microarchitecture of the AMD Instinct MI250 accelerators is based on the
-AMD CDNA 2 architecture that targets compute applications such as HPC,
-artificial intelligence (AI), and machine learning (ML) and that run on
-everything from individual servers to the world’s largest exascale
-supercomputers. The overall system architecture is designed for extreme
-scalability and compute performance.
-
-The following image shows the components of a single Graphics Compute Die (GCD) of the CDNA 2 architecture. On the top and the bottom are AMD Infinity Fabric™
-interfaces and their physical links that are used to connect the GPU die to the
-other system-level components of the node (see also Section 2.2). Both
-interfaces can drive four AMD Infinity Fabric links. One of the AMD Infinity
-Fabric links of the controller at the bottom can be configured as a PCIe link.
-Each of the AMD Infinity Fabric links between GPUs can run at up to 25 GT/sec,
-which correlates to a peak transfer bandwidth of 50 GB/sec for a 16-wide link (
-two bytes per transaction). Section 2.2 has more details on the number of AMD
-Infinity Fabric links and the resulting transfer rates between the system-level
-components.
-
-To the left and the right are memory controllers that attach the High Bandwidth
-Memory (HBM) modules to the GCD. AMD Instinct MI250 GPUs use HBM2e, which offers
-a peak memory bandwidth of 1.6 TB/sec per GCD.
-
-The execution units of the GPU are depicted in the following image as Compute
-Units (CU). The MI250 GCD has 104 active CUs. Each compute unit is further
-subdivided into four SIMD units that process SIMD instructions of 16 data
-elements per instruction (for the FP64 data type). This enables the CU to
-process 64 work items (a so-called “wavefront”) at a peak clock frequency of 1.7
-GHz. Therefore, the theoretical maximum FP64 peak performance per GCD is 22.6
-TFLOPS for vector instructions. This equates to 45.3 TFLOPS for vector instructions for both GCDs together. The MI250 compute units also provide specialized
-execution units (also called matrix cores), which are geared toward executing
-matrix operations like matrix-matrix multiplications. For FP64, the peak
-performance of these units amounts to 90.5 TFLOPS.
-
-![Structure of a single GCD in the AMD Instinct MI250 accelerator.](../../data/conceptual/gpu-arch/image001.png "Structure of a single GCD in the AMD Instinct MI250 accelerator.")
-
-```{list-table} Peak-performance capabilities of the MI250 OAM for different data types.
-:header-rows: 1
-:name: mi250-perf-table
-
-*
-  - Computation and Data Type
-  - FLOPS/CLOCK/CU
-  - Peak TFLOPS
-*
-  - Matrix FP64
-  - 256
-  - 90.5
-*
-  - Vector FP64
-  - 128
-  - 45.3
-*
-  - Matrix FP32
-  - 256
-  - 90.5
-*
-  - Packed FP32
-  - 256
-  - 90.5
-*
-  - Vector FP32
-  - 128
-  - 45.3
-*
-  - Matrix FP16
-  - 1024
-  - 362.1
-*
-  - Matrix BF16
-  - 1024
-  - 362.1
-*
-  - Matrix INT8
-  - 1024
-  - 362.1
-```
-
-The above table summarizes the aggregated peak performance of the AMD
-Instinct MI250 OCP Open Accelerator Modules (OAM, OCP is short for Open Compute
-Platform) and its two GCDs for different data types and execution units. The
-middle column lists the peak performance (number of data elements processed in a
-single instruction) of a single compute unit if a SIMD (or matrix) instruction
-is being retired in each clock cycle. The third column lists the theoretical
-peak performance of the OAM module. The theoretical aggregated peak memory
-bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
-
-![Dual-GCD architecture of the AMD Instinct MI250 accelerators](../../data/conceptual/gpu-arch/image002.png "Dual-GCD architecture of the AMD Instinct MI250 accelerators")
-
-The following image shows the block diagram of an OAM package that consists
-of two GCDs, each of which constitutes one GPU device in the system. The two
-GCDs in the package are connected via four AMD Infinity Fabric links running at
-a theoretical peak rate of 25 GT/sec, giving 200 GB/sec peak transfer bandwidth
-between the two GCDs of an OAM, or a bidirectional peak transfer bandwidth of
-400 GB/sec for the same.
-
-## Node-level architecture
-
-The following image shows the node-level architecture of a system that is
-based on the AMD Instinct MI250 accelerator. The MI250 OAMs attach to the host
-system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe
-x16 link to the host part of the system. Depending on the server platform, the
-GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch
-. Note that some platforms may offer an x8 interface to the GCDs, which reduces
-the available host-to-GPU bandwidth.
-
-![Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor](../../data/conceptual/gpu-arch/image003.png "Block diagram of AMD Instinct MI250 Accelerators with 3rd Generation AMD EPYC processor")
-
-The preceding image shows the node-level architecture of a system with AMD
-EPYC processors in a dual-socket configuration and four AMD Instinct MI250
-accelerators. The MI250 OAMs attach to the host processors system via PCIe Gen 4
-x16 links (yellow lines). Depending on the system design, a PCIe switch may
-exist to make more PCIe lanes available for additional components like network
-interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to
-the host part of the system or to the PCIe switch. Please note, some platforms
-may offer an x8 interface to the GCDs, which will reduce the available
-host-to-GPU bandwidth.
-
-Between the OAMs and their respective GCDs, a peer-to-peer (P2P) network allows
-for direct data exchange between the GPU dies via AMD Infinity Fabric links (
-black, green, and red lines). Each of these 16-wide links connects to one of the
-two GPU dies in the MI250 OAM and operates at 25 GT/sec, which corresponds to a
-theoretical peak transfer rate of 50 GB/sec per link (or 100 GB/sec
-bidirectional peak transfer bandwidth). The GCD pairs 2 and 6 as well as GCDs 0
-and 4 connect via two XGMI links, which is indicated by the thicker red line in
-the preceding image.
--- a/docs/conceptual/gpu-arch/mi300-mi200-performance-counters.rst
+++ b/docs/conceptual/gpu-arch/mi300-mi200-performance-counters.rst
@@ -1,757 +0,0 @@
-.. meta::
-  :description: MI300 and MI200 series performance counters and metrics
-  :keywords: MI300, MI200, performance counters, command processor counters
-
-***************************************************************************************************
-MI300 and MI200 series performance counters and metrics
-***************************************************************************************************
-
-This document lists and describes the hardware performance counters and derived metrics available
-for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the
-:doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`.
-
-MI300 and MI200 series performance counters
-===============================================================
-
-Series performance counters include the following categories:
-
-* :ref:`command-processor-counters`
-* :ref:`graphics-register-bus-manager-counters`
-* :ref:`spi-counters`
-* :ref:`compute-unit-counters`
-* :ref:`l1i-and-sl1d-cache-counters`
-* :ref:`vector-l1-cache-subsystem-counters`
-* :ref:`l2-cache-access-counters`
-
-The following sections provide additional details for each category.
-
-.. note::
-
-  Preliminary validation of all MI300 and MI200 series performance counters is in progress. Those with
-  an asterisk (*) require further evaluation.
-
-.. _command-processor-counters:
-
-Command processor counters
---------------------------------------------------------------------------------------------------------------
-
-Command processor counters are further classified into command processor-fetcher and command
-processor-compute.
-
-Command processor-fetcher counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``CPF_CMP_UTCL1_STALL_ON_TRANSLATION``", "Cycles", "Number of cycles one of the compute unified translation caches (L1) is stalled waiting on translation"
-  "``CPF_CPF_STAT_BUSY``", "Cycles", "Number of cycles command processor-fetcher is busy"
-  "``CPF_CPF_STAT_IDLE``", "Cycles", "Number of cycles command processor-fetcher is idle"
-  "``CPF_CPF_STAT_STALL``", "Cycles", "Number of cycles command processor-fetcher is stalled"
-  "``CPF_CPF_TCIU_BUSY``", "Cycles", "Number of cycles command processor-fetcher texture cache interface unit interface is busy"
-  "``CPF_CPF_TCIU_IDLE``", "Cycles", "Number of cycles command processor-fetcher texture cache interface unit interface is idle"
-  "``CPF_CPF_TCIU_STALL``", "Cycles", "Number of cycles command processor-fetcher texture cache interface unit interface is stalled waiting on free tags"
-
-The texture cache interface unit is the interface between the command processor and the memory
-system.
-
-Command processor-compute counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``CPC_ME1_BUSY_FOR_PACKET_DECODE``", "Cycles", "Number of cycles command processor-compute micro engine is busy decoding packets"
-  "``CPC_UTCL1_STALL_ON_TRANSLATION``", "Cycles", "Number of cycles one of the unified translation caches (L1) is stalled waiting on translation"
-  "``CPC_CPC_STAT_BUSY``", "Cycles", "Number of cycles command processor-compute is busy"
-  "``CPC_CPC_STAT_IDLE``", "Cycles", "Number of cycles command processor-compute is idle"
-  "``CPC_CPC_STAT_STALL``", "Cycles", "Number of cycles command processor-compute is stalled"
-  "``CPC_CPC_TCIU_BUSY``", "Cycles", "Number of cycles command processor-compute texture cache interface unit interface is busy"
-  "``CPC_CPC_TCIU_IDLE``", "Cycles", "Number of cycles command processor-compute texture cache interface unit interface is idle"
-  "``CPC_CPC_UTCL2IU_BUSY``", "Cycles", "Number of cycles command processor-compute unified translation cache (L2) interface is busy"
-  "``CPC_CPC_UTCL2IU_IDLE``", "Cycles", "Number of cycles command processor-compute unified translation cache (L2) interface is idle"
-  "``CPC_CPC_UTCL2IU_STALL``", "Cycles", "Number of cycles command processor-compute unified translation cache (L2) interface is stalled"
-  "``CPC_ME1_DC0_SPI_BUSY``", "Cycles", "Number of cycles command processor-compute micro engine processor is busy"
-
-The micro engine runs packet-processing firmware on the command processor-compute counter.
-
-.. _graphics-register-bus-manager-counters:
-
-Graphics register bus manager counters
---------------------------------------------------------------------------------------------------------------
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``GRBM_COUNT``", "Cycles","Number of free-running GPU cycles"
-  "``GRBM_GUI_ACTIVE``", "Cycles", "Number of GPU active cycles"
-  "``GRBM_CP_BUSY``", "Cycles", "Number of cycles any of the command processor blocks are busy"
-  "``GRBM_SPI_BUSY``", "Cycles", "Number of cycles any of the shader processor input is busy in the shader engines"
-  "``GRBM_TA_BUSY``", "Cycles", "Number of cycles any of the texture addressing unit is busy in the shader engines"
-  "``GRBM_TC_BUSY``", "Cycles", "Number of cycles any of the texture cache blocks are busy"
-  "``GRBM_CPC_BUSY``", "Cycles", "Number of cycles the command processor-compute is busy"
-  "``GRBM_CPF_BUSY``", "Cycles", "Number of cycles the command processor-fetcher is busy"
-  "``GRBM_UTCL2_BUSY``", "Cycles", "Number of cycles the unified translation cache (Level 2 [L2]) block is busy"
-  "``GRBM_EA_BUSY``", "Cycles", "Number of cycles the efficiency arbiter block is busy"
-
-Texture cache blocks include:
-
-* Texture cache arbiter
-* Texture cache per pipe, also known as vector Level 1 (L1) cache
-* Texture cache per channel, also known as known as L2 cache
-* Texture cache interface
-
-.. _spi-counters:
-
-Shader processor input counters
---------------------------------------------------------------------------------------------------------------
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SPI_CSN_BUSY``", "Cycles", "Number of cycles with outstanding waves"
-  "``SPI_CSN_WINDOW_VALID``", "Cycles", "Number of cycles enabled by ``perfcounter_start`` event"
-  "``SPI_CSN_NUM_THREADGROUPS``", "Workgroups", "Number of dispatched workgroups"
-  "``SPI_CSN_WAVE``", "Wavefronts", "Number of dispatched wavefronts"
-  "``SPI_RA_REQ_NO_ALLOC``", "Cycles", "Number of arbiter cycles with requests but no allocation"
-  "``SPI_RA_REQ_NO_ALLOC_CSN``", "Cycles", "Number of arbiter cycles with compute shader (n\ :sup:`th` pipe) requests but no compute shader (n\ :sup:`th` pipe) allocation"
-  "``SPI_RA_RES_STALL_CSN``", "Cycles", "Number of arbiter stall cycles due to shortage of compute shader (n\ :sup:`th` pipe) pipeline slots"
-  "``SPI_RA_TMP_STALL_CSN``", "Cycles", "Number of stall cycles due to shortage of temp space"
-  "``SPI_RA_WAVE_SIMD_FULL_CSN``", "SIMD-cycles", "Accumulated number of single instruction, multiple data (SIMD) per cycle affected by shortage of wave slots for compute shader (n\ :sup:`th` pipe) wave dispatch"
-  "``SPI_RA_VGPR_SIMD_FULL_CSN``", "SIMD-cycles", "Accumulated number of SIMDs per cycle affected by shortage of vector general-purpose register (VGPR) slots for compute shader (n\ :sup:`th` pipe) wave dispatch"
-  "``SPI_RA_SGPR_SIMD_FULL_CSN``", "SIMD-cycles", "Accumulated number of SIMDs per cycle affected by shortage of scalar general-purpose register (SGPR) slots for compute shader (n\ :sup:`th` pipe) wave dispatch"
-  "``SPI_RA_LDS_CU_FULL_CSN``", "CU", "Number of compute units affected by shortage of local data share (LDS) space for compute shader (n\ :sup:`th` pipe) wave dispatch"
-  "``SPI_RA_BAR_CU_FULL_CSN``", "CU", "Number of compute units with compute shader (n\ :sup:`th` pipe) waves waiting at a BARRIER"
-  "``SPI_RA_BULKY_CU_FULL_CSN``", "CU", "Number of compute units with compute shader (n\ :sup:`th` pipe) waves waiting for BULKY resource"
-  "``SPI_RA_TGLIM_CU_FULL_CSN``", "Cycles", "Number of compute shader (n\ :sup:`th` pipe) wave stall cycles due to restriction of ``tg_limit`` for thread group size"
-  "``SPI_RA_WVLIM_STALL_CSN``", "Cycles", "Number of cycles compute shader (n\ :sup:`th` pipe) is stalled due to ``WAVE_LIMIT``"
-  "``SPI_VWC_CSC_WR``", "Qcycles", "Number of quad-cycles taken to initialize VGPRs when launching waves"
-  "``SPI_SWC_CSC_WR``", "Qcycles", "Number of quad-cycles taken to initialize SGPRs when launching waves"
-
-.. _compute-unit-counters:
-
-Compute unit counters
---------------------------------------------------------------------------------------------------------------
-
-The compute unit counters are further classified into instruction mix, matrix fused multiply-add (FMA)
-operation counters, level counters, wavefront counters, wavefront cycle counters, and LDS counters.
-
-Instruction mix
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_INSTS``", "Instr", "Number of instructions issued"
-  "``SQ_INSTS_VALU``", "Instr", "Number of vector arithmetic logic unit (VALU) instructions including matrix FMA issued"
-  "``SQ_INSTS_VALU_ADD_F16``", "Instr", "Number of VALU half-precision floating-point (F16) ``ADD`` or ``SUB`` instructions issued"
-  "``SQ_INSTS_VALU_MUL_F16``", "Instr", "Number of VALU F16 Multiply instructions issued"
-  "``SQ_INSTS_VALU_FMA_F16``", "Instr", "Number of VALU F16 FMA or multiply-add instructions issued"
-  "``SQ_INSTS_VALU_TRANS_F16``", "Instr", "Number of VALU F16 Transcendental instructions issued"
-  "``SQ_INSTS_VALU_ADD_F32``", "Instr", "Number of VALU full-precision floating-point (F32) ``ADD`` or ``SUB`` instructions issued"
-  "``SQ_INSTS_VALU_MUL_F32``", "Instr", "Number of VALU F32 Multiply instructions issued"
-  "``SQ_INSTS_VALU_FMA_F32``", "Instr", "Number of VALU F32 FMAor multiply-add instructions issued"
-  "``SQ_INSTS_VALU_TRANS_F32``", "Instr", "Number of VALU F32 Transcendental instructions issued"
-  "``SQ_INSTS_VALU_ADD_F64``", "Instr", "Number of VALU F64 ``ADD`` or ``SUB`` instructions issued"
-  "``SQ_INSTS_VALU_MUL_F64``", "Instr", "Number of VALU F64 Multiply instructions issued"
-  "``SQ_INSTS_VALU_FMA_F64``", "Instr", "Number of VALU F64 FMA or multiply-add instructions issued"
-  "``SQ_INSTS_VALU_TRANS_F64``", "Instr", "Number of VALU F64 Transcendental instructions issued"
-  "``SQ_INSTS_VALU_INT32``", "Instr", "Number of VALU 32-bit integer instructions (signed or unsigned) issued"
-  "``SQ_INSTS_VALU_INT64``", "Instr", "Number of VALU 64-bit integer instructions (signed or unsigned) issued"
-  "``SQ_INSTS_VALU_CVT``", "Instr", "Number of VALU Conversion instructions issued"
-  "``SQ_INSTS_VALU_MFMA_I8``", "Instr", "Number of 8-bit Integer matrix FMA instructions issued"
-  "``SQ_INSTS_VALU_MFMA_F16``", "Instr", "Number of F16 matrix FMA instructions issued"
-  "``SQ_INSTS_VALU_MFMA_F32``", "Instr", "Number of F32 matrix FMA instructions issued"
-  "``SQ_INSTS_VALU_MFMA_F64``", "Instr", "Number of F64 matrix FMA instructions issued"
-  "``SQ_INSTS_MFMA``", "Instr", "Number of matrix FMA instructions issued"
-  "``SQ_INSTS_VMEM_WR``", "Instr", "Number of vector memory write instructions (including flat) issued"
-  "``SQ_INSTS_VMEM_RD``", "Instr", "Number of vector memory read instructions (including flat) issued"
-  "``SQ_INSTS_VMEM``", "Instr", "Number of vector memory instructions issued, including both flat and buffer instructions"
-  "``SQ_INSTS_SALU``", "Instr", "Number of scalar arithmetic logic unit (SALU) instructions issued"
-  "``SQ_INSTS_SMEM``", "Instr", "Number of scalar memory instructions issued"
-  "``SQ_INSTS_SMEM_NORM``", "Instr", "Number of scalar memory instructions normalized to match ``smem_level`` issued"
-  "``SQ_INSTS_FLAT``", "Instr", "Number of flat instructions issued"
-  "``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
-  "``SQ_INSTS_LDS``", "Instr", "Number of LDS instructions issued **(MI200: includes flat; MI300: does not include flat)**"
-  "``SQ_INSTS_GDS``", "Instr", "Number of global data share instructions issued"
-  "``SQ_INSTS_EXP_GDS``", "Instr", "Number of EXP and global data share instructions excluding skipped export instructions issued"
-  "``SQ_INSTS_BRANCH``", "Instr", "Number of branch instructions issued"
-  "``SQ_INSTS_SENDMSG``", "Instr", "Number of ``SENDMSG`` instructions including ``s_endpgm`` issued"
-  "``SQ_INSTS_VSKIPPED``", "Instr", "Number of vector instructions skipped"
-
-Flat instructions allow read, write, and atomic access to a generic memory address pointer that can
-resolve to any of the following physical memories:
-
-* Global Memory
-* Scratch ("private")
-* LDS ("shared")
-* Invalid - ``MEM_VIOL`` TrapStatus
-
-Matrix fused multiply-add operation counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_INSTS_VALU_MFMA_MOPS_I8``", "IOP", "Number of 8-bit integer matrix FMA ops in the unit of 512"
-  "``SQ_INSTS_VALU_MFMA_MOPS_F16``", "FLOP", "Number of F16 floating matrix FMA ops in the unit of 512"
-  "``SQ_INSTS_VALU_MFMA_MOPS_BF16``", "FLOP", "Number of BF16 floating matrix FMA ops in the unit of 512"
-  "``SQ_INSTS_VALU_MFMA_MOPS_F32``", "FLOP", "Number of F32 floating matrix FMA ops in the unit of 512"
-  "``SQ_INSTS_VALU_MFMA_MOPS_F64``", "FLOP", "Number of F64 floating matrix FMA ops in the unit of 512"
-
-Level counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. note::
-
-  All level counters must be followed by ``SQ_ACCUM_PREV_HIRES`` counter to measure average latency.
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_ACCUM_PREV``", "Count", "Accumulated counter sample value where accumulation takes place once every four cycles"
-  "``SQ_ACCUM_PREV_HIRES``", "Count", "Accumulated counter sample value where accumulation takes place once every cycle"
-  "``SQ_LEVEL_WAVES``", "Waves", "Number of inflight waves"
-  "``SQ_INST_LEVEL_VMEM``", "Instr", "Number of inflight vector memory (including flat) instructions"
-  "``SQ_INST_LEVEL_SMEM``", "Instr", "Number of inflight scalar memory instructions"
-  "``SQ_INST_LEVEL_LDS``", "Instr", "Number of inflight LDS (including flat) instructions"
-  "``SQ_IFETCH_LEVEL``", "Instr", "Number of inflight instruction fetch requests from the cache"
-
-Use the following formulae to calculate latencies:
-
-* Vector memory latency = ``SQ_ACCUM_PREV_HIRES`` divided by ``SQ_INSTS_VMEM``
-* Wave latency = ``SQ_ACCUM_PREV_HIRES`` divided by ``SQ_WAVE``
-* LDS latency = ``SQ_ACCUM_PREV_HIRES`` divided by ``SQ_INSTS_LDS``
-* Scalar memory latency = ``SQ_ACCUM_PREV_HIRES`` divided by ``SQ_INSTS_SMEM_NORM``
-* Instruction fetch latency = ``SQ_ACCUM_PREV_HIRES`` divided by ``SQ_IFETCH``
-
-Wavefront counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_WAVES``", "Waves", "Number of wavefronts dispatched to sequencers, including both new and restored wavefronts"
-  "``SQ_WAVES_SAVED``", "Waves", "Number of context-saved waves"
-  "``SQ_WAVES_RESTORED``", "Waves", "Number of context-restored waves sent to sequencers"
-  "``SQ_WAVES_EQ_64``", "Waves", "Number of wavefronts with exactly 64 active threads sent to sequencers"
-  "``SQ_WAVES_LT_64``", "Waves", "Number of wavefronts with less than 64 active threads sent to sequencers"
-  "``SQ_WAVES_LT_48``", "Waves", "Number of wavefronts with less than 48 active threads sent to sequencers"
-  "``SQ_WAVES_LT_32``", "Waves", "Number of wavefronts with less than 32 active threads sent to sequencers"
-  "``SQ_WAVES_LT_16``", "Waves", "Number of wavefronts with less than 16 active threads sent to sequencers"
-
-Wavefront cycle counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_CYCLES``", "Cycles", "Clock cycles"
-  "``SQ_BUSY_CYCLES``", "Cycles", "Number of cycles while sequencers reports it to be busy"
-  "``SQ_BUSY_CU_CYCLES``", "Qcycles", "Number of quad-cycles each compute unit is busy"
-  "``SQ_VALU_MFMA_BUSY_CYCLES``", "Cycles", "Number of cycles the matrix FMA arithmetic logic unit (ALU) is busy"
-  "``SQ_WAVE_CYCLES``", "Qcycles", "Number of quad-cycles spent by waves in the compute units"
-  "``SQ_WAIT_ANY``", "Qcycles", "Number of quad-cycles spent waiting for anything"
-  "``SQ_WAIT_INST_ANY``", "Qcycles", "Number of quad-cycles spent waiting for any instruction to be issued"
-  "``SQ_ACTIVE_INST_ANY``", "Qcycles", "Number of quad-cycles spent by each wave to work on an instruction"
-  "``SQ_ACTIVE_INST_VMEM``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on a vector memory instruction"
-  "``SQ_ACTIVE_INST_LDS``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on an LDS instruction"
-  "``SQ_ACTIVE_INST_VALU``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on a VALU instruction"
-  "``SQ_ACTIVE_INST_SCA``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on a SALU or scalar memory instruction"
-  "``SQ_ACTIVE_INST_EXP_GDS``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on an ``EXPORT`` or ``GDS`` instruction"
-  "``SQ_ACTIVE_INST_MISC``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on a ``BRANCH`` or ``SENDMSG`` instruction"
-  "``SQ_ACTIVE_INST_FLAT``", "Qcycles", "Number of quad-cycles spent by the sequencer instruction arbiter to work on a flat instruction"
-  "``SQ_INST_CYCLES_VMEM_WR``", "Qcycles", "Number of quad-cycles spent to send addr and cmd data for vector memory write instructions"
-  "``SQ_INST_CYCLES_VMEM_RD``", "Qcycles", "Number of quad-cycles spent to send addr and cmd data for vector memory read instructions"
-  "``SQ_INST_CYCLES_SMEM``", "Qcycles", "Number of quad-cycles spent to execute scalar memory reads"
-  "``SQ_INST_CYCLES_SALU``", "Qcycles", "Number of quad-cycles spent to execute non-memory read scalar operations"
-  "``SQ_THREAD_CYCLES_VALU``", "Qcycles", "Number of quad-cycles spent to execute VALU operations on active threads"
-  "``SQ_WAIT_INST_LDS``", "Qcycles", "Number of quad-cycles spent waiting for LDS instruction to be issued"
-
-``SQ_THREAD_CYCLES_VALU`` is similar to ``INST_CYCLES_VALU``, but it's multiplied by the number of
-active threads.
-
-LDS counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_LDS_ATOMIC_RETURN``", "Cycles", "Number of atomic return cycles in LDS"
-  "``SQ_LDS_BANK_CONFLICT``", "Cycles", "Number of cycles LDS is stalled by bank conflicts"
-  "``SQ_LDS_ADDR_CONFLICT``", "Cycles", "Number of cycles LDS is stalled by address conflicts"
-  "``SQ_LDS_UNALIGNED_STALL``", "Cycles", "Number of cycles LDS is stalled processing flat unaligned load or store operations"
-  "``SQ_LDS_MEM_VIOLATIONS``", "Count", "Number of threads that have a memory violation in the LDS"
-  "``SQ_LDS_IDX_ACTIVE``", "Cycles", "Number of cycles LDS is used for indexed operations"
-
-Miscellaneous counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQ_IFETCH``", "Count", "Number of instruction fetch requests from L1i, in 32-byte width"
-  "``SQ_ITEMS``", "Threads", "Number of valid items per wave"
-
-.. _l1i-and-sl1d-cache-counters:
-
-L1 instruction cache (L1i) and scalar L1 data cache (L1d) counters
---------------------------------------------------------------------------------------------------------------
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition"
-
-  "``SQC_ICACHE_REQ``", "Req", "Number of L1 instruction (L1i) cache requests"
-  "``SQC_ICACHE_HITS``", "Count", "Number of L1i cache hits"
-  "``SQC_ICACHE_MISSES``", "Count", "Number of non-duplicate L1i cache misses including uncached requests"
-  "``SQC_ICACHE_MISSES_DUPLICATE``", "Count", "Number of duplicate L1i cache misses whose previous lookup miss on the same cache line is not fulfilled yet"
-  "``SQC_DCACHE_REQ``", "Req", "Number of scalar L1d requests"
-  "``SQC_DCACHE_INPUT_VALID_READYB``", "Cycles", "Number of cycles while sequencer input is valid but scalar L1d is not ready"
-  "``SQC_DCACHE_HITS``", "Count", "Number of scalar L1d hits"
-  "``SQC_DCACHE_MISSES``", "Count", "Number of non-duplicate scalar L1d misses including uncached requests"
-  "``SQC_DCACHE_MISSES_DUPLICATE``", "Count", "Number of duplicate scalar L1d misses"
-  "``SQC_DCACHE_REQ_READ_1``", "Req", "Number of constant cache read requests in a single 32-bit data word"
-  "``SQC_DCACHE_REQ_READ_2``", "Req", "Number of constant cache read requests in two 32-bit data words"
-  "``SQC_DCACHE_REQ_READ_4``", "Req", "Number of constant cache read requests in four 32-bit data words"
-  "``SQC_DCACHE_REQ_READ_8``", "Req", "Number of constant cache read requests in eight 32-bit data words"
-  "``SQC_DCACHE_REQ_READ_16``", "Req", "Number of constant cache read requests in 16 32-bit data words"
-  "``SQC_DCACHE_ATOMIC``", "Req", "Number of atomic requests"
-  "``SQC_TC_REQ``", "Req", "Number of texture cache requests that were issued by instruction and constant caches"
-  "``SQC_TC_INST_REQ``", "Req", "Number of instruction requests to the L2 cache"
-  "``SQC_TC_DATA_READ_REQ``", "Req", "Number of data Read requests to the L2 cache"
-  "``SQC_TC_DATA_WRITE_REQ``", "Req", "Number of data write requests to the L2 cache"
-  "``SQC_TC_DATA_ATOMIC_REQ``", "Req", "Number of data atomic requests to the L2 cache"
-  "``SQC_TC_STALL``", "Cycles", "Number of cycles while the valid requests to the L2 cache are stalled"
-
-.. _vector-l1-cache-subsystem-counters:
-
-Vector L1 cache subsystem counters
---------------------------------------------------------------------------------------------------------------
-
-The vector L1 cache subsystem counters are further classified into texture addressing unit, texture data
-unit, vector L1d or texture cache per pipe, and texture cache arbiter counters.
-
-Texture addressing unit counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-  "``TA_TA_BUSY[n]``", "Cycles", "Texture addressing unit busy cycles", "0-15"
-  "``TA_TOTAL_WAVEFRONTS[n]``", "Instr", "Number of wavefronts processed by texture addressing unit", "0-15"
-  "``TA_BUFFER_WAVEFRONTS[n]``", "Instr", "Number of buffer wavefronts processed by texture addressing unit", "0-15"
-  "``TA_BUFFER_READ_WAVEFRONTS[n]``", "Instr", "Number of buffer read wavefronts processed by texture addressing unit", "0-15"
-  "``TA_BUFFER_WRITE_WAVEFRONTS[n]``", "Instr", "Number of buffer write wavefronts processed by texture addressing unit", "0-15"
-  "``TA_BUFFER_ATOMIC_WAVEFRONTS[n]``", "Instr", "Number of buffer atomic wavefronts processed by texture addressing unit", "0-15"
-  "``TA_BUFFER_TOTAL_CYCLES[n]``", "Cycles", "Number of buffer cycles (including read and write) issued to texture cache", "0-15"
-  "``TA_BUFFER_COALESCED_READ_CYCLES[n]``", "Cycles", "Number of coalesced buffer read cycles issued to texture cache", "0-15"
-  "``TA_BUFFER_COALESCED_WRITE_CYCLES[n]``", "Cycles", "Number of coalesced buffer write cycles issued to texture cache", "0-15"
-  "``TA_ADDR_STALLED_BY_TC_CYCLES[n]``", "Cycles", "Number of cycles texture addressing unit address path is stalled by texture cache", "0-15"
-  "``TA_DATA_STALLED_BY_TC_CYCLES[n]``", "Cycles", "Number of cycles texture addressing unit data path is stalled by texture cache", "0-15"
-  "``TA_ADDR_STALLED_BY_TD_CYCLES[n]``", "Cycles", "Number of cycles texture addressing unit address path is stalled by texture data unit", "0-15"
-  "``TA_FLAT_WAVEFRONTS[n]``", "Instr", "Number of flat opcode wavefronts processed by texture addressing unit", "0-15"
-  "``TA_FLAT_READ_WAVEFRONTS[n]``", "Instr", "Number of flat opcode read wavefronts processed by texture addressing unit", "0-15"
-  "``TA_FLAT_WRITE_WAVEFRONTS[n]``", "Instr", "Number of flat opcode write wavefronts processed by texture addressing unit", "0-15"
-  "``TA_FLAT_ATOMIC_WAVEFRONTS[n]``", "Instr", "Number of flat opcode atomic wavefronts processed by texture addressing unit", "0-15"
-
-Texture data unit counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-  "``TD_TD_BUSY[n]``", "Cycle", "Texture data unit busy cycles while it is processing or waiting for data", "0-15"
-  "``TD_TC_STALL[n]``", "Cycle", "Number of cycles texture data unit is stalled waiting for texture cache data", "0-15"
-  "``TD_SPI_STALL[n]``", "Cycle", "Number of cycles texture data unit is stalled by shader processor input", "0-15"
-  "``TD_LOAD_WAVEFRONT[n]``", "Instr", "Number of wavefront instructions (read, write, atomic)", "0-15"
-  "``TD_STORE_WAVEFRONT[n]``", "Instr", "Number of write wavefront instructions", "0-15"
-  "``TD_ATOMIC_WAVEFRONT[n]``", "Instr", "Number of atomic wavefront instructions", "0-15"
-  "``TD_COALESCABLE_WAVEFRONT[n]``", "Instr", "Number of coalescable wavefronts according to texture addressing unit", "0-15"
-
-Texture cache per pipe counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-  "``TCP_GATE_EN1[n]``", "Cycles", "Number of cycles vector L1d interface clocks are turned on", "0-15"
-  "``TCP_GATE_EN2[n]``", "Cycles", "Number of cycles vector L1d core clocks are turned on", "0-15"
-  "``TCP_TD_TCP_STALL_CYCLES[n]``", "Cycles", "Number of cycles texture data unit stalls vector L1d", "0-15"
-  "``TCP_TCR_TCP_STALL_CYCLES[n]``", "Cycles", "Number of cycles texture cache router stalls vector L1d", "0-15"
-  "``TCP_READ_TAGCONFLICT_STALL_CYCLES[n]``", "Cycles", "Number of cycles tag RAM conflict stalls on a read", "0-15"
-  "``TCP_WRITE_TAGCONFLICT_STALL_CYCLES[n]``", "Cycles", "Number of cycles tag RAM conflict stalls on a write", "0-15"
-  "``TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES[n]``", "Cycles", "Number of cycles tag RAM conflict stalls on an atomic", "0-15"
-  "``TCP_PENDING_STALL_CYCLES[n]``", "Cycles", "Number of cycles vector L1d is stalled due to data pending from L2 Cache", "0-15"
-  "``TCP_TCP_TA_DATA_STALL_CYCLES``", "Cycles", "Number of cycles texture cache per pipe stalls texture addressing unit data interface", "NA"
-  "``TCP_TA_TCP_STATE_READ[n]``", "Req", "Number of state reads", "0-15"
-  "``TCP_VOLATILE[n]``", "Req", "Number of L1 volatile pixels or buffers from texture addressing unit", "0-15"
-  "``TCP_TOTAL_ACCESSES[n]``", "Req", "Number of vector L1d accesses. Equals ``TCP_PERF_SEL_TOTAL_READ`+`TCP_PERF_SEL_TOTAL_NONREAD``", "0-15"
-  "``TCP_TOTAL_READ[n]``", "Req", "Number of vector L1d read accesses", "0-15"
-  "``TCP_TOTAL_WRITE[n]``", "Req", "Number of vector L1d write accesses", "0-15"
-  "``TCP_TOTAL_ATOMIC_WITH_RET[n]``", "Req", "Number of vector L1d atomic requests with return", "0-15"
-  "``TCP_TOTAL_ATOMIC_WITHOUT_RET[n]``", "Req", "Number of vector L1d atomic without return", "0-15"
-  "``TCP_TOTAL_WRITEBACK_INVALIDATES[n]``", "Count", "Total number of vector L1d writebacks and invalidates", "0-15"
-  "``TCP_UTCL1_REQUEST[n]``", "Req", "Number of address translation requests to unified translation cache (L1)", "0-15"
-  "``TCP_UTCL1_TRANSLATION_HIT[n]``", "Req", "Number of unified translation cache (L1) translation hits", "0-15"
-  "``TCP_UTCL1_TRANSLATION_MISS[n]``", "Req", "Number of unified translation cache (L1) translation misses", "0-15"
-  "``TCP_UTCL1_PERMISSION_MISS[n]``", "Req", "Number of unified translation cache (L1) permission misses", "0-15"
-  "``TCP_TOTAL_CACHE_ACCESSES[n]``", "Req", "Number of vector L1d cache accesses including hits and misses", "0-15"
-  "``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
-  "``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
-  "``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
-  "``TCP_TCC_READ_REQ[n]``", "Req", "Number of read requests to L2 cache", "0-15"
-  "``TCP_TCC_WRITE_REQ[n]``", "Req", "Number of write requests to L2 cache", "0-15"
-  "``TCP_TCC_ATOMIC_WITH_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache with return", "0-15"
-  "``TCP_TCC_ATOMIC_WITHOUT_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache without return", "0-15"
-  "``TCP_TCC_NC_READ_REQ[n]``", "Req", "Number of non-coherently cached read requests to L2 cache", "0-15"
-  "``TCP_TCC_UC_READ_REQ[n]``", "Req", "Number of uncached read requests to L2 cache", "0-15"
-  "``TCP_TCC_CC_READ_REQ[n]``", "Req", "Number of coherently cached read requests to L2 cache", "0-15"
-  "``TCP_TCC_RW_READ_REQ[n]``", "Req", "Number of coherently cached with write read requests to L2 cache", "0-15"
-  "``TCP_TCC_NC_WRITE_REQ[n]``", "Req", "Number of non-coherently cached write requests to L2 cache", "0-15"
-  "``TCP_TCC_UC_WRITE_REQ[n]``", "Req", "Number of uncached write requests to L2 cache", "0-15"
-  "``TCP_TCC_CC_WRITE_REQ[n]``", "Req", "Number of coherently cached write requests to L2 cache", "0-15"
-  "``TCP_TCC_RW_WRITE_REQ[n]``", "Req", "Number of coherently cached with write write requests to L2 cache", "0-15"
-  "``TCP_TCC_NC_ATOMIC_REQ[n]``", "Req", "Number of non-coherently cached atomic requests to L2 cache", "0-15"
-  "``TCP_TCC_UC_ATOMIC_REQ[n]``", "Req", "Number of uncached atomic requests to L2 cache", "0-15"
-  "``TCP_TCC_CC_ATOMIC_REQ[n]``", "Req", "Number of coherently cached atomic requests to L2 cache", "0-15"
-  "``TCP_TCC_RW_ATOMIC_REQ[n]``", "Req", "Number of coherently cached with write atomic requests to L2 cache", "0-15"
-
-Note that:
-
-* ``TCP_TOTAL_READ[n]`` = ``TCP_PERF_SEL_TOTAL_HIT_LRU_READ`` + ``TCP_PERF_SEL_TOTAL_MISS_LRU_READ`` + ``TCP_PERF_SEL_TOTAL_MISS_EVICT_READ``
-* ``TCP_TOTAL_WRITE[n]`` = ``TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE``+ ``TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE``
-* ``TCP_TOTAL_WRITEBACK_INVALIDATES[n]`` = ``TCP_PERF_SEL_TOTAL_WBINVL1``+ ``TCP_PERF_SEL_TOTAL_WBINVL1_VOL``+ ``TCP_PERF_SEL_CP_TCP_INVALIDATE``+ ``TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL``
-
-Texture cache arbiter counters
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. csv-table::
-  :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-  "``TCA_CYCLE[n]``", "Cycles", "Number of texture cache arbiter cycles", "0-31"
-  "``TCA_BUSY[n]``", "Cycles", "Number of cycles texture cache arbiter has a pending request", "0-31"
-
-.. _l2-cache-access-counters:
-
-L2 cache access counters
---------------------------------------------------------------------------------------------------------------
-
-L2 cache is also known as texture cache per channel.
-
-.. tab-set::
-
-    .. tab-item:: MI300 hardware counter
-
-      .. csv-table::
-        :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-        "``TCC_CYCLE[n]``", "Cycles", "Number of L2 cache free-running clocks", "0-31"
-        "``TCC_BUSY[n]``", "Cycles", "Number of L2 cache busy cycles", "0-31"
-        "``TCC_REQ[n]``", "Req", "Number of L2 cache requests of all types (measured at the tag block)", "0-31"
-        "``TCC_STREAMING_REQ[n]``", "Req", "Number of L2 cache streaming requests (measured at the tag block)", "0-31"
-        "``TCC_NC_REQ[n]``", "Req", "Number of non-coherently cached requests (measured at the tag block)", "0-31"
-        "``TCC_UC_REQ[n]``", "Req", "Number of uncached requests. This is measured at the tag block", "0-31"
-        "``TCC_CC_REQ[n]``", "Req", "Number of coherently cached requests. This is measured at the tag block", "0-31"
-        "``TCC_RW_REQ[n]``", "Req", "Number of coherently cached with write requests. This is measured at the tag block", "0-31"
-        "``TCC_PROBE[n]``", "Req", "Number of probe requests", "0-31"
-        "``TCC_PROBE_ALL[n]``", "Req", "Number of external probe requests with ``EA_TCC_preq_all == 1``", "0-31"
-        "``TCC_READ[n]``", "Req", "Number of L2 cache read requests (includes compressed reads but not metadata reads)", "0-31"
-        "``TCC_WRITE[n]``", "Req", "Number of L2 cache write requests", "0-31"
-        "``TCC_ATOMIC[n]``", "Req", "Number of L2 cache atomic requests of all types", "0-31"
-        "``TCC_HIT[n]``", "Req", "Number of L2 cache hits", "0-31"
-        "``TCC_MISS[n]``", "Req", "Number of L2 cache misses", "0-31"
-        "``TCC_WRITEBACK[n]``", "Req", "Number of lines written back to the main memory, including writebacks of dirty lines and uncached write or atomic requests", "0-31"
-        "``TCC_EA0_WRREQ[n]``", "Req", "Number of 32-byte and 64-byte transactions going over the ``TC_EA_wrreq`` interface (doesn't include probe commands)", "0-31"
-        "``TCC_EA0_WRREQ_64B[n]``", "Req", "Total number of 64-byte transactions (write or ``CMPSWAP``) going over the ``TC_EA_wrreq`` interface", "0-31"
-        "``TCC_EA0_WR_UNCACHED_32B[n]``", "Req", "Number of 32 or 64-byte write or atomic going over the ``TC_EA_wrreq`` interface due to uncached traffic", "0-31"
-        "``TCC_EA0_WRREQ_STALL[n]``", "Cycles", "Number of cycles a write request is stalled", "0-31"
-        "``TCC_EA0_WRREQ_IO_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of input-output (IO) credits", "0-31"
-        "``TCC_EA0_WRREQ_GMI_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of GMI credits", "0-31"
-        "``TCC_EA0_WRREQ_DRAM_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of DRAM credits", "0-31"
-        "``TCC_TOO_MANY_EA_WRREQS_STALL[n]``", "Cycles", "Number of cycles the L2 cache is unable to send an efficiency arbiter write request due to it reaching its maximum capacity of pending efficiency arbiter write requests", "0-31"
-        "``TCC_EA0_WRREQ_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter write requests in flight", "0-31"
-        "``TCC_EA0_ATOMIC[n]``", "Req", "Number of 32-byte or 64-byte atomic requests going over the ``TC_EA_wrreq`` interface", "0-31"
-        "``TCC_EA0_ATOMIC_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter atomic requests in flight", "0-31"
-        "``TCC_EA0_RDREQ[n]``", "Req", "Number of 32-byte or 64-byte read requests to efficiency arbiter", "0-31"
-        "``TCC_EA0_RDREQ_32B[n]``", "Req", "Number of 32-byte read requests to efficiency arbiter", "0-31"
-        "``TCC_EA0_RD_UNCACHED_32B[n]``", "Req", "Number of 32-byte efficiency arbiter reads due to uncached traffic. A 64-byte request is counted as 2", "0-31"
-        "``TCC_EA0_RDREQ_IO_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of IO credits", "0-31"
-        "``TCC_EA0_RDREQ_GMI_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of GMI credits", "0-31"
-        "``TCC_EA0_RDREQ_DRAM_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of DRAM credits", "0-31"
-        "``TCC_EA0_RDREQ_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter read requests in flight", "0-31"
-        "``TCC_EA0_RDREQ_DRAM[n]``", "Req", "Number of 32-byte or 64-byte efficiency arbiter read requests to High Bandwidth Memory (HBM)", "0-31"
-        "``TCC_EA0_WRREQ_DRAM[n]``", "Req", "Number of 32-byte or 64-byte efficiency arbiter write requests to HBM", "0-31"
-        "``TCC_TAG_STALL[n]``", "Cycles", "Number of cycles the normal request pipeline in the tag is stalled for any reason", "0-31"
-        "``TCC_NORMAL_WRITEBACK[n]``", "Req", "Number of writebacks due to requests that are not writeback requests", "0-31"
-        "``TCC_ALL_TC_OP_WB_WRITEBACK[n]``", "Req", "Number of writebacks due to all ``TC_OP`` writeback requests", "0-31"
-        "``TCC_NORMAL_EVICT[n]``", "Req", "Number of evictions due to requests that are not invalidate or probe requests", "0-31"
-        "``TCC_ALL_TC_OP_INV_EVICT[n]``", "Req", "Number of evictions due to all ``TC_OP`` invalidate requests", "0-31"
-
-    .. tab-item:: MI200 hardware counter
-
-      .. csv-table::
-        :header: "Hardware counter", "Unit", "Definition", "Value range for ``n``"
-
-        "``TCC_CYCLE[n]``", "Cycles", "Number of L2 cache free-running clocks", "0-31"
-        "``TCC_BUSY[n]``", "Cycles", "Number of L2 cache busy cycles", "0-31"
-        "``TCC_REQ[n]``", "Req", "Number of L2 cache requests of all types (measured at the tag block)", "0-31"
-        "``TCC_STREAMING_REQ[n]``", "Req", "Number of L2 cache streaming requests (measured at the tag block)", "0-31"
-        "``TCC_NC_REQ[n]``", "Req", "Number of non-coherently cached requests (measured at the tag block)", "0-31"
-        "``TCC_UC_REQ[n]``", "Req", "Number of uncached requests. This is measured at the tag block", "0-31"
-        "``TCC_CC_REQ[n]``", "Req", "Number of coherently cached requests. This is measured at the tag block", "0-31"
-        "``TCC_RW_REQ[n]``", "Req", "Number of coherently cached with write requests. This is measured at the tag block", "0-31"
-        "``TCC_PROBE[n]``", "Req", "Number of probe requests", "0-31"
-        "``TCC_PROBE_ALL[n]``", "Req", "Number of external probe requests with ``EA_TCC_preq_all == 1``", "0-31"
-        "``TCC_READ[n]``", "Req", "Number of L2 cache read requests (includes compressed reads but not metadata reads)", "0-31"
-        "``TCC_WRITE[n]``", "Req", "Number of L2 cache write requests", "0-31"
-        "``TCC_ATOMIC[n]``", "Req", "Number of L2 cache atomic requests of all types", "0-31"
-        "``TCC_HIT[n]``", "Req", "Number of L2 cache hits", "0-31"
-        "``TCC_MISS[n]``", "Req", "Number of L2 cache misses", "0-31"
-        "``TCC_WRITEBACK[n]``", "Req", "Number of lines written back to the main memory, including writebacks of dirty lines and uncached write or atomic requests", "0-31"
-        "``TCC_EA_WRREQ[n]``", "Req", "Number of 32-byte and 64-byte transactions going over the ``TC_EA_wrreq`` interface (doesn't include probe commands)", "0-31"
-        "``TCC_EA_WRREQ_64B[n]``", "Req", "Total number of 64-byte transactions (write or ``CMPSWAP``) going over the ``TC_EA_wrreq`` interface", "0-31"
-        "``TCC_EA_WR_UNCACHED_32B[n]``", "Req", "Number of 32 write or atomic going over the ``TC_EA_wrreq`` interface due to uncached traffic. A 64-byte request will be counted as 2", "0-31"
-        "``TCC_EA_WRREQ_STALL[n]``", "Cycles", "Number of cycles a write request is stalled", "0-31"
-        "``TCC_EA_WRREQ_IO_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of input-output (IO) credits", "0-31"
-        "``TCC_EA_WRREQ_GMI_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of GMI credits", "0-31"
-        "``TCC_EA_WRREQ_DRAM_CREDIT_STALL[n]``", "Cycles", "Number of cycles an efficiency arbiter write request is stalled due to the interface running out of DRAM credits", "0-31"
-        "``TCC_TOO_MANY_EA_WRREQS_STALL[n]``", "Cycles", "Number of cycles the L2 cache is unable to send an efficiency arbiter write request due to it reaching its maximum capacity of pending efficiency arbiter write requests", "0-31"
-        "``TCC_EA_WRREQ_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter write requests in flight", "0-31"
-        "``TCC_EA_ATOMIC[n]``", "Req", "Number of 32-byte or 64-byte atomic requests going over the ``TC_EA_wrreq`` interface", "0-31"
-        "``TCC_EA_ATOMIC_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter atomic requests in flight", "0-31"
-        "``TCC_EA_RDREQ[n]``", "Req", "Number of 32-byte or 64-byte read requests to efficiency arbiter", "0-31"
-        "``TCC_EA_RDREQ_32B[n]``", "Req", "Number of 32-byte read requests to efficiency arbiter", "0-31"
-        "``TCC_EA_RD_UNCACHED_32B[n]``", "Req", "Number of 32-byte efficiency arbiter reads due to uncached traffic. A 64-byte request is counted as 2", "0-31"
-        "``TCC_EA_RDREQ_IO_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of IO credits", "0-31"
-        "``TCC_EA_RDREQ_GMI_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of GMI credits", "0-31"
-        "``TCC_EA_RDREQ_DRAM_CREDIT_STALL[n]``", "Cycles", "Number of cycles there is a stall due to the read request interface running out of DRAM credits", "0-31"
-        "``TCC_EA_RDREQ_LEVEL[n]``", "Req", "The accumulated number of efficiency arbiter read requests in flight", "0-31"
-        "``TCC_EA_RDREQ_DRAM[n]``", "Req", "Number of 32-byte or 64-byte efficiency arbiter read requests to High Bandwidth Memory (HBM)", "0-31"
-        "``TCC_EA_WRREQ_DRAM[n]``", "Req", "Number of 32-byte or 64-byte efficiency arbiter write requests to HBM", "0-31"
-        "``TCC_TAG_STALL[n]``", "Cycles", "Number of cycles the normal request pipeline in the tag is stalled for any reason", "0-31"
-        "``TCC_NORMAL_WRITEBACK[n]``", "Req", "Number of writebacks due to requests that are not writeback requests", "0-31"
-        "``TCC_ALL_TC_OP_WB_WRITEBACK[n]``", "Req", "Number of writebacks due to all ``TC_OP`` writeback requests", "0-31"
-        "``TCC_NORMAL_EVICT[n]``", "Req", "Number of evictions due to requests that are not invalidate or probe requests", "0-31"
-        "``TCC_ALL_TC_OP_INV_EVICT[n]``", "Req", "Number of evictions due to all ``TC_OP`` invalidate requests", "0-31"
-
-Note the following:
-
-* ``TCC_REQ[n]`` may be more than the number of requests arriving at the texture cache per channel,
-  but it's a good indication of the total amount of work that needs to be performed.
-* For ``TCC_EA0_WRREQ[n]``, atomics may travel over the same interface and are generally classified as
-  write requests.
-* CC mtypes can produce uncached requests, and those are included in
-  ``TCC_EA0_WR_UNCACHED_32B[n]``
-* ``TCC_EA0_WRREQ_LEVEL[n]`` is primarily intended to measure average efficiency arbiter write latency.
-
-  * Average write latency = ``TCC_PERF_SEL_EA0_WRREQ_LEVEL`` divided by ``TCC_PERF_SEL_EA0_WRREQ``
-
-* ``TCC_EA0_ATOMIC_LEVEL[n]`` is primarily intended to measure average efficiency arbiter atomic
-  latency
-
-  * Average atomic latency = ``TCC_PERF_SEL_EA0_WRREQ_ATOMIC_LEVEL`` divided by ``TCC_PERF_SEL_EA0_WRREQ_ATOMIC``
-
-* ``TCC_EA0_RDREQ_LEVEL[n]`` is primarily intended to measure average efficiency arbiter read latency.
-
-  * Average read latency = ``TCC_PERF_SEL_EA0_RDREQ_LEVEL`` divided by ``TCC_PERF_SEL_EA0_RDREQ``
-
-* Stalls can occur regardless of the need for a read to be performed
-* Normally, stalls are measured exactly at one point in the pipeline however in the case of
-  ``TCC_TAG_STALL[n]``, probes can stall the pipeline at a variety of places. There is no single point that
-  can accurately measure the total stalls
-
-MI300 and MI200 series derived metrics list
-==============================================================
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``ALUStalledByLDS``", "Percentage of GPU time ALU units are stalled due to the LDS input queue being full or the output queue not being ready (value range: 0% (optimal) to 100%)"
-  "``FetchSize``", "Total kilobytes fetched from the video memory; measured with all extra fetches and any cache or memory effects taken into account"
-  "``FlatLDSInsts``", "Average number of flat instructions that read from or write to LDS, run per work item (affected by flow control)"
-  "``FlatVMemInsts``", "Average number of flat instructions that read from or write to the video memory, run per work item (affected by flow control). Includes flat instructions that read from or write to scratch"
-  "``GDSInsts``", "Average number of global data share read or write instructions run per work item (affected by flow control)"
-  "``GPUBusy``", "Percentage of time GPU is busy"
-  "``L2CacheHit``", "Percentage of fetch, write, atomic, and other instructions that hit the data in L2 cache (value range: 0% (no hit) to 100% (optimal))"
-  "``LDSBankConflict``", "Percentage of GPU time LDS is stalled by bank conflicts (value range: 0% (optimal) to 100%)"
-  "``LDSInsts``", "Average number of LDS read or write instructions run per work item (affected by flow control). Excludes flat instructions that read from or write to LDS."
-  "``MemUnitBusy``", "Percentage of GPU time the memory unit is active, which is measured with all extra fetches and writes and any cache or memory effects taken into account (value range: 0% to 100% (fetch-bound))"
-  "``MemUnitStalled``", "Percentage of GPU time the memory unit is stalled (value range: 0% (optimal) to 100%)"
-  "``MemWrites32B``", "Total number of effective 32B write transactions to the memory"
-  "``TCA_BUSY_sum``", "Total number of cycles texture cache arbiter has a pending request, over all texture cache arbiter instances"
-  "``TCA_CYCLE_sum``", "Total number of cycles over all texture cache arbiter instances"
-  "``SALUBusy``", "Percentage of GPU time scalar ALU instructions are processed (value range: 0% to 100% (optimal))"
-  "``SALUInsts``", "Average number of scalar ALU instructions run per work item (affected by flow control)"
-  "``SFetchInsts``", "Average number of scalar fetch instructions from the video memory run per work item (affected by flow control)"
-  "``VALUBusy``", "Percentage of GPU time vector ALU instructions are processed (value range: 0% to 100% (optimal))"
-  "``VALUInsts``", "Average number of vector ALU instructions run per work item (affected by flow control)"
-  "``VALUUtilization``", "Percentage of active vector ALU threads in a wave, where a lower number can mean either more thread divergence in a wave or that the work-group size is not a multiple of 64 (value range: 0%, 100% (optimal - no thread divergence))"
-  "``VFetchInsts``", "Average number of vector fetch instructions from the video memory run per work-item (affected by flow control); excludes flat instructions that fetch from video memory"
-  "``VWriteInsts``", "Average number of vector write instructions to the video memory run per work-item (affected by flow control); excludes flat instructions that write to video memory"
-  "``Wavefronts``", "Total wavefronts"
-  "``WRITE_REQ_32B``", "Total number of 32-byte effective memory writes"
-  "``WriteSize``", "Total kilobytes written to the video memory; measured with all extra fetches and any cache or memory effects taken into account"
-  "``WriteUnitStalled``", "Percentage of GPU time the write unit is stalled (value range: 0% (optimal) to 100%)"
-
-You can lower ``ALUStalledByLDS`` by reducing LDS bank conflicts or number of LDS accesses.
-You can lower ``MemUnitStalled`` by reducing the number or size of fetches and writes.
-``MemUnitBusy`` includes the stall time (``MemUnitStalled``).
-
-Hardware counters by and over all texture addressing unit instances
---------------------------------------------------------------------------------------------------------------
-
-The following table shows the hardware counters *by* all texture addressing unit instances.
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TA_BUFFER_WAVEFRONTS_sum``", "Total number of buffer wavefronts processed"
-  "``TA_BUFFER_READ_WAVEFRONTS_sum``", "Total number of buffer read wavefronts processed"
-  "``TA_BUFFER_WRITE_WAVEFRONTS_sum``", "Total number of buffer write wavefronts processed"
-  "``TA_BUFFER_ATOMIC_WAVEFRONTS_sum``", "Total number of buffer atomic wavefronts processed"
-  "``TA_BUFFER_TOTAL_CYCLES_sum``", "Total number of buffer cycles (including read and write) issued to texture cache"
-  "``TA_BUFFER_COALESCED_READ_CYCLES_sum``", "Total number of coalesced buffer read cycles issued to texture cache"
-  "``TA_BUFFER_COALESCED_WRITE_CYCLES_sum``", "Total number of coalesced buffer write cycles issued to texture cache"
-  "``TA_FLAT_READ_WAVEFRONTS_sum``", "Sum of flat opcode reads processed"
-  "``TA_FLAT_WRITE_WAVEFRONTS_sum``", "Sum of flat opcode writes processed"
-  "``TA_FLAT_WAVEFRONTS_sum``", "Total number of flat opcode wavefronts processed"
-  "``TA_FLAT_ATOMIC_WAVEFRONTS_sum``", "Total number of flat opcode atomic wavefronts processed"
-  "``TA_TOTAL_WAVEFRONTS_sum``", "Total number of wavefronts processed"
-
-The following table shows the hardware counters *over* all texture addressing unit instances.
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TA_ADDR_STALLED_BY_TC_CYCLES_sum``", "Total number of cycles texture addressing unit address path is stalled by texture cache"
-  "``TA_ADDR_STALLED_BY_TD_CYCLES_sum``", "Total number of cycles texture addressing unit address path is stalled by texture data unit"
-  "``TA_BUSY_avr``", "Average number of busy cycles"
-  "``TA_BUSY_max``", "Maximum number of texture addressing unit busy cycles"
-  "``TA_BUSY_min``", "Minimum number of texture addressing unit busy cycles"
-  "``TA_DATA_STALLED_BY_TC_CYCLES_sum``", "Total number of cycles texture addressing unit data path is stalled by texture cache"
-  "``TA_TA_BUSY_sum``", "Total number of texture addressing unit busy cycles"
-
-Hardware counters over all texture cache per channel instances
---------------------------------------------------------------------------------------------------------------
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TCC_ALL_TC_OP_WB_WRITEBACK_sum``", "Total number of writebacks due to all ``TC_OP`` writeback requests."
-  "``TCC_ALL_TC_OP_INV_EVICT_sum``", "Total number of evictions due to all ``TC_OP`` invalidate requests."
-  "``TCC_ATOMIC_sum``", "Total number of L2 cache atomic requests of all types."
-  "``TCC_BUSY_avr``", "Average number of L2 cache busy cycles."
-  "``TCC_BUSY_sum``", "Total number of L2 cache busy cycles."
-  "``TCC_CC_REQ_sum``", "Total number of coherently cached requests."
-  "``TCC_CYCLE_sum``", "Total number of L2 cache free running clocks."
-  "``TCC_EA0_WRREQ_sum``", "Total number of 32-byte and 64-byte transactions going over the ``TC_EA0_wrreq`` interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands."
-  "``TCC_EA0_WRREQ_64B_sum``", "Total number of 64-byte transactions (write or `CMPSWAP`) going over the ``TC_EA0_wrreq`` interface."
-  "``TCC_EA0_WR_UNCACHED_32B_sum``", "Total Number of 32-byte write or atomic going over the ``TC_EA0_wrreq`` interface due to uncached traffic. Note that coherently cached mtypes can produce uncached requests, and those are included in this. A 64-byte request is counted as 2."
-  "``TCC_EA0_WRREQ_STALL_sum``", "Total Number of cycles a write request is stalled, over all instances."
-  "``TCC_EA0_WRREQ_IO_CREDIT_STALL_sum``", "Total number of cycles an efficiency arbiter write request is stalled due to the interface running out of IO credits, over all instances."
-  "``TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum``", "Total number of cycles an efficiency arbiter write request is stalled due to the interface running out of GMI credits, over all instances."
-  "``TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum``", "Total number of cycles an efficiency arbiter write request is stalled due to the interface running out of DRAM credits, over all instances."
-  "``TCC_EA0_WRREQ_LEVEL_sum``", "Total number of efficiency arbiter write requests in flight."
-  "``TCC_EA0_RDREQ_LEVEL_sum``", "Total number of efficiency arbiter read requests in flight."
-  "``TCC_EA0_ATOMIC_sum``", "Total Number of 32-byte or 64-byte atomic requests going over the ``TC_EA0_wrreq`` interface."
-  "``TCC_EA0_ATOMIC_LEVEL_sum``", "Total number of efficiency arbiter atomic requests in flight."
-  "``TCC_EA0_RDREQ_sum``", "Total number of 32-byte or 64-byte read requests to efficiency arbiter."
-  "``TCC_EA0_RDREQ_32B_sum``", "Total number of 32-byte read requests to efficiency arbiter."
-  "``TCC_EA0_RD_UNCACHED_32B_sum``", "Total number of 32-byte efficiency arbiter reads due to uncached traffic."
-  "``TCC_EA0_RDREQ_IO_CREDIT_STALL_sum``", "Total number of cycles there is a stall due to the read request interface running out of IO credits."
-  "``TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum``", "Total number of cycles there is a stall due to the read request interface running out of GMI credits."
-  "``TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum``", "Total number of cycles there is a stall due to the read request interface running out of DRAM credits."
-  "``TCC_EA0_RDREQ_DRAM_sum``", "Total number of 32-byte or 64-byte efficiency arbiter read requests to HBM."
-  "``TCC_EA0_WRREQ_DRAM_sum``", "Total number of 32-byte or 64-byte efficiency arbiter write requests to HBM."
-  "``TCC_HIT_sum``", "Total number of L2 cache hits."
-  "``TCC_MISS_sum``", "Total number of L2 cache misses."
-  "``TCC_NC_REQ_sum``", "Total number of non-coherently cached requests."
-  "``TCC_NORMAL_WRITEBACK_sum``", "Total number of writebacks due to requests that are not writeback requests."
-  "``TCC_NORMAL_EVICT_sum``", "Total number of evictions due to requests that are not invalidate or probe requests."
-  "``TCC_PROBE_sum``", "Total number of probe requests."
-  "``TCC_PROBE_ALL_sum``", "Total number of external probe requests with ``EA0_TCC_preq_all == 1``."
-  "``TCC_READ_sum``", "Total number of L2 cache read requests (including compressed reads but not metadata reads)."
-  "``TCC_REQ_sum``", "Total number of all types of L2 cache requests."
-  "``TCC_RW_REQ_sum``", "Total number of coherently cached with write requests."
-  "``TCC_STREAMING_REQ_sum``", "Total number of L2 cache streaming requests."
-  "``TCC_TAG_STALL_sum``", "Total number of cycles the normal request pipeline in the tag is stalled for any reason."
-  "``TCC_TOO_MANY_EA0_WRREQS_STALL_sum``", "Total number of cycles L2 cache is unable to send an efficiency arbiter write request due to it reaching its maximum capacity of pending efficiency arbiter write requests."
-  "``TCC_UC_REQ_sum``", "Total number of uncached requests."
-  "``TCC_WRITE_sum``", "Total number of L2 cache write requests."
-  "``TCC_WRITEBACK_sum``", "Total number of lines written back to the main memory including writebacks of dirty lines and uncached write or atomic requests."
-  "``TCC_WRREQ_STALL_max``", "Maximum number of cycles a write request is stalled."
-
-Hardware counters by, for, or over all texture cache per pipe instances
----------------------------------------------------------------------------------------------------------------
-
-The following table shows the hardware counters *by* all texture cache per pipe instances.
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TCP_TA_TCP_STATE_READ_sum``", "Total number of state reads by ATCPPI"
-  "``TCP_TOTAL_CACHE_ACCESSES_sum``", "Total number of vector L1d accesses (including hits and misses)"
-  "``TCP_UTCL1_PERMISSION_MISS_sum``", "Total number of unified translation cache (L1) permission misses"
-  "``TCP_UTCL1_REQUEST_sum``", "Total number of address translation requests to unified translation cache (L1)"
-  "``TCP_UTCL1_TRANSLATION_MISS_sum``", "Total number of unified translation cache (L1) translation misses"
-  "``TCP_UTCL1_TRANSLATION_HIT_sum``", "Total number of unified translation cache (L1) translation hits"
-
-The following table shows the hardware counters *for* all texture cache per pipe instances.
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TCP_TCC_READ_REQ_LATENCY_sum``", "Total vector L1d to L2 request latency over all wavefronts for reads and atomics with return"
-  "``TCP_TCC_WRITE_REQ_LATENCY_sum``", "Total vector L1d to L2 request latency over all wavefronts for writes and atomics without return"
-  "``TCP_TCP_LATENCY_sum``", "Total wave access latency to vector L1d over all wavefronts"
-
-The following table shows the hardware counters *over* all texture cache per pipe instances.
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum``", "Total number of cycles tag RAM conflict stalls on an atomic"
-  "``TCP_GATE_EN1_sum``", "Total number of cycles vector L1d interface clocks are turned on"
-  "``TCP_GATE_EN2_sum``", "Total number of cycles vector L1d core clocks are turned on"
-  "``TCP_PENDING_STALL_CYCLES_sum``", "Total number of cycles vector L1d cache is stalled due to data pending from L2 Cache"
-  "``TCP_READ_TAGCONFLICT_STALL_CYCLES_sum``", "Total number of cycles tag RAM conflict stalls on a read"
-  "``TCP_TCC_ATOMIC_WITH_RET_REQ_sum``", "Total number of atomic requests to L2 cache with return"
-  "``TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum``", "Total number of atomic requests to L2 cache without return"
-  "``TCP_TCC_CC_READ_REQ_sum``", "Total number of coherently cached read requests to L2 cache"
-  "``TCP_TCC_CC_WRITE_REQ_sum``", "Total number of coherently cached write requests to L2 cache"
-  "``TCP_TCC_CC_ATOMIC_REQ_sum``", "Total number of coherently cached atomic requests to L2 cache"
-  "``TCP_TCC_NC_READ_REQ_sum``", "Total number of non-coherently cached read requests to L2 cache"
-  "``TCP_TCC_NC_WRITE_REQ_sum``", "Total number of non-coherently cached write requests to L2 cache"
-  "``TCP_TCC_NC_ATOMIC_REQ_sum``", "Total number of non-coherently cached atomic requests to L2 cache"
-  "``TCP_TCC_READ_REQ_sum``", "Total number of read requests to L2 cache"
-  "``TCP_TCC_RW_READ_REQ_sum``", "Total number of coherently cached with write read requests to L2 cache"
-  "``TCP_TCC_RW_WRITE_REQ_sum``", "Total number of coherently cached with write write requests to L2 cache"
-  "``TCP_TCC_RW_ATOMIC_REQ_sum``", "Total number of coherently cached with write atomic requests to L2 cache"
-  "``TCP_TCC_UC_READ_REQ_sum``", "Total number of uncached read requests to L2 cache"
-  "``TCP_TCC_UC_WRITE_REQ_sum``", "Total number of uncached write requests to L2 cache"
-  "``TCP_TCC_UC_ATOMIC_REQ_sum``", "Total number of uncached atomic requests to L2 cache"
-  "``TCP_TCC_WRITE_REQ_sum``", "Total number of write requests to L2 cache"
-  "``TCP_TCR_TCP_STALL_CYCLES_sum``", "Total number of cycles texture cache router stalls vector L1d"
-  "``TCP_TD_TCP_STALL_CYCLES_sum``", "Total number of cycles texture data unit stalls vector L1d"
-  "``TCP_TOTAL_ACCESSES_sum``", "Total number of vector L1d accesses"
-  "``TCP_TOTAL_READ_sum``", "Total number of vector L1d read accesses"
-  "``TCP_TOTAL_WRITE_sum``", "Total number of vector L1d write accesses"
-  "``TCP_TOTAL_ATOMIC_WITH_RET_sum``", "Total number of vector L1d atomic requests with return"
-  "``TCP_TOTAL_ATOMIC_WITHOUT_RET_sum``", "Total number of vector L1d atomic requests without return"
-  "``TCP_TOTAL_WRITEBACK_INVALIDATES_sum``", "Total number of vector L1d writebacks and invalidates"
-  "``TCP_VOLATILE_sum``", "Total number of L1 volatile pixels or buffers from texture addressing unit"
-  "``TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum``", "Total number of cycles tag RAM conflict stalls on a write"
-
-Hardware counter over all texture data unit instances
--------------------------------------------------------
-
-.. csv-table::
-  :header: "Hardware counter", "Definition"
-
-  "``TD_ATOMIC_WAVEFRONT_sum``", "Total number of atomic wavefront instructions"
-  "``TD_COALESCABLE_WAVEFRONT_sum``", "Total number of coalescable wavefronts according to texture addressing unit"
-  "``TD_LOAD_WAVEFRONT_sum``", "Total number of wavefront instructions (read, write, atomic)"
-  "``TD_SPI_STALL_sum``", "Total number of cycles texture data unit is stalled by shader processor input"
-  "``TD_STORE_WAVEFRONT_sum``", "Total number of write wavefront instructions"
-  "``TD_TC_STALL_sum``", "Total number of cycles texture data unit is stalled waiting for texture cache data"
-  "``TD_TD_BUSY_sum``", "Total number of texture data unit busy cycles while it is processing or waiting for data"
--- a/docs/conceptual/gpu-arch/mi300.md
+++ b/docs/conceptual/gpu-arch/mi300.md
@@ -1,129 +0,0 @@
---
-myst:
-  html_meta:
-    "description lang=en": "Learn about the AMD Instinct MI300 series architecture."
-    "keywords": "Instinct, MI300X, MI300A, microarchitecture, AMD, ROCm"
---
-
-# AMD Instinct™ MI300 series microarchitecture
-
-The AMD Instinct MI300 series accelerators are based on the AMD CDNA 3
-architecture which was designed to deliver leadership performance for HPC, artificial intelligence (AI), and machine
-learning (ML) workloads. The AMD Instinct MI300 series accelerators are well-suited for extreme scalability and compute performance, running
-on everything from individual servers to the world’s largest exascale supercomputers.
-
-With the MI300 series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
-GPU computational elements of the processor along with the lower levels of the cache hierarchy.
-
-The following image depicts the structure of a single XCD in the AMD Instinct MI300 accelerator series.
-
-```{figure} ../../data/shared/xcd-sys-arch.png
---
-name: mi300-xcd
-align: center
---
-XCD-level system architecture showing 40 Compute Units, each with 32 KB L1 cache, a Unified Compute System with 4 ACE Compute Accelerators, shared 4MB of L2 cache and an HWS Hardware Scheduler.
-```
-
-On the XCD, four Asynchronous Compute Engines (ACEs) send compute shader workgroups to the
-Compute Units (CUs). The XCD has 40 CUs: 38 active CUs at the aggregate level and 2 disabled CUs for
-yield management. The CUs all share a 4 MB L2 cache that serves to coalesce all memory traffic for the
-die. With less than half of the CUs of the AMD Instinct MI200 Series compute die, the AMD CDNA™ 3
-XCD die is a smaller building block. However, it uses more advanced packaging and the processor
-can include 6 or 8 XCDs for up to 304 CUs, roughly 40% more than MI250X.
-
-The MI300 Series integrate up to 8 vertically stacked XCDs, 8 stacks of
-High-Bandwidth Memory 3 (HBM3) and 4 I/O dies (containing system
-infrastructure) using the AMD Infinity Fabric™ technology as interconnect.
-
-The Matrix Cores inside the CDNA 3 CUs have significant improvements, emphasizing AI and machine
-learning, enhancing throughput of existing data types while adding support for new data types.
-CDNA 2 Matrix Cores support FP16 and BF16, while offering INT8 for inference. Compared to MI250X
-accelerators, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
-performance gain of 6.8 times for INT8. FP8 has a performance gain of 16 times compared to FP32,
-while TF32 has a gain of 4 times compared to FP32.
-
-```{list-table} Peak-performance capabilities of the MI300X for different data types.
-:header-rows: 1
-:name: mi300x-perf-table
-
-*
-  - Computation and Data Type
-  - FLOPS/CLOCK/CU
-  - Peak TFLOPS
-*
-  - Matrix FP64
-  - 256
-  - 163.4
-*
-  - Vector FP64
-  - 128
-  - 81.7
-*
-  - Matrix FP32
-  - 256
-  - 163.4
-*
-  - Vector FP32
-  - 256
-  - 163.4
-*
-  - Vector TF32
-  - 1024
-  - 653.7
-*
-  - Matrix FP16
-  - 2048
-  - 1307.4
-*
-  - Matrix BF16
-  - 2048
-  - 1307.4
-*
-  - Matrix FP8
-  - 4096
-  - 2614.9
-*
-  - Matrix INT8
-  - 4096
-  - 2614.9
-```
-
-The above table summarizes the aggregated peak performance of the AMD Instinct MI300X Open
-Compute Platform (OCP) Open Accelerator Modules (OAMs) for different data types and command
-processors. The middle column lists the peak performance (number of data elements processed in a
-single instruction) of a single compute unit if a SIMD (or matrix) instruction is submitted in each clock
-cycle. The third column lists the theoretical peak performance of the OAM. The theoretical aggregated
-peak memory bandwidth of the GPU is 5.3 TB per second.
-
-The following image shows the block diagram of the APU (left) and the OAM package (right) both
-connected via AMD Infinity Fabric™ network on-chip.
-
-```{figure} ../../data/conceptual/gpu-arch/image008.png
---
-name: mi300-arch
-alt:
-align: center
---
-MI300 series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
-```
-
-## Node-level architecture
-
-```{figure} ../../data/shared/mi300-node-level-arch.png
---
-name: mi300-node
-
-align: center
---
-MI300 series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
-```
-
-The image above shows the node-level architecture of a system with AMD EPYC processors in a
-dual-socket configuration and eight AMD Instinct MI300X accelerators. The MI300X OAMs attach to the
-host system via PCIe Gen 5 x16 links (yellow lines). The GPUs are using seven high-bandwidth,
-low-latency AMD Infinity Fabric™ links (red lines) to form a fully connected 8-GPU system.
-
-<!---
-We need performance data about the P2P communication here.
-->
--- a/docs/conceptual/gpu-isolation.md
+++ b/docs/conceptual/gpu-isolation.md
@@ -1,116 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="GPU isolation techniques">
-  <meta name="keywords" content="GPU isolation techniques, UUID, universally unique identifier,
-  environment variables, virtual machines, AMD, ROCm">
-</head>
-
-# GPU isolation techniques
-
-Restricting the access of applications to a subset of GPUs, aka isolating
-GPUs allows users to hide GPU resources from programs. The programs by default
-will only use the "exposed" GPUs ignoring other (hidden) GPUs in the system.
-
-There are multiple ways to achieve isolation of GPUs in the ROCm software stack,
-differing in which applications they apply to and the security they provide.
-This page serves as an overview of the techniques.
-
-## Environment variables
-
-The runtimes in the ROCm software stack read these environment variables to
-select the exposed or default device to present to applications using them.
-
-Environment variables shouldn't be used for isolating untrusted applications,
-as an application can reset them before initializing the runtime.
-
-### `ROCR_VISIBLE_DEVICES`
-
-A list of device indices or {abbr}`UUID (universally unique identifier)`s
-that will be exposed to applications.
-
-Runtime
-: ROCm Software Runtime. Applies to all applications using the user mode ROCm
-  software stack.
-
-```{code-block} shell
-:caption: Example to expose the 1. device and a device based on UUID.
-export ROCR_VISIBLE_DEVICES="0,GPU-DEADBEEFDEADBEEF"
-```
-
-### `GPU_DEVICE_ORDINAL`
-
-Devices indices exposed to OpenCL and HIP applications.
-
-Runtime
-: ROCm Compute Language Runtime (`ROCclr`). Applies to applications and runtimes
-  using the `ROCclr` abstraction layer including HIP and OpenCL applications.
-
-```{code-block} shell
-:caption: Example to expose the 1. and 3. device in the system.
-export GPU_DEVICE_ORDINAL="0,2"
-```
-
-(hip_visible_devices)=
-
-### `HIP_VISIBLE_DEVICES`
-
-Device indices exposed to HIP applications.
-
-Runtime: HIP runtime. Applies only to applications using HIP on the AMD platform.
-
-```{code-block} shell
-:caption: Example to expose the 1. and 3. devices in the system.
-export HIP_VISIBLE_DEVICES="0,2"
-```
-
-### `CUDA_VISIBLE_DEVICES`
-
-Provided for CUDA compatibility, has the same effect as `HIP_VISIBLE_DEVICES`
-on the AMD platform.
-
-Runtime
-: HIP or CUDA Runtime. Applies to HIP applications on the AMD or NVIDIA platform
-  and CUDA applications.
-
-### `OMP_DEFAULT_DEVICE`
-
-Default device used for OpenMP target offloading.
-
-Runtime
-: OpenMP Runtime. Applies only to applications using OpenMP offloading.
-
-```{code-block} shell
-:caption: Example on setting the default device to the third device.
-export OMP_DEFAULT_DEVICE="2"
-```
-
-## Docker
-
-Docker uses Linux kernel namespaces to provide isolated environments for
-applications. This isolation applies to most devices by default, including
-GPUs. To access them in containers explicit access must be granted, please see
-{ref}`docker-access-gpus-in-container` for details.
-Specifically refer to {ref}`docker-restrict-gpus` on exposing just a subset
-of all GPUs.
-
-Docker isolation is more secure than environment variables, and applies
-to all programs that use the `amdgpu` kernel module interfaces.
-Even programs that don't use the ROCm runtime, like graphics applications
-using OpenGL or Vulkan, can only access the GPUs exposed to the container.
-
-## GPU passthrough to virtual machines
-
-Virtual machines achieve the highest level of isolation, because even the kernel
-of the virtual machine is isolated from the host. Devices physically installed
-in the host system can be passed to the virtual machine using PCIe passthrough.
-This allows for using the GPU with a different operating systems like a Windows
-guest from a Linux host.
-
-Setting up PCIe passthrough is specific to the hypervisor used. ROCm officially
-supports [VMware ESXi](https://www.vmware.com/products/esxi-and-esx.html)
-for select GPUs.
-
-<!--
-TODO: This should link to a page about virtualization that explains
-      pass-through and SR-IOV and how-tos for maybe `libvirt` and `VMWare`
-->
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -5,64 +5,9 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html

 import os
-import shutil
 import sys
 from pathlib import Path

-shutil.copy2("../RELEASE.md", "./about/release-notes.md")
-shutil.copy2("../CHANGELOG.md", "./release/changelog.md")
-
-# Mark the consolidated changelog as orphan to prevent Sphinx from warning about missing toctree entries
-with open("./release/changelog.md", "r+") as file:
-    content = file.read()
-    file.seek(0)
-    file.write(":orphan:\n" + content)
-
-# Replace GitHub-style [!ADMONITION]s with Sphinx-compatible ```{admonition} blocks
-with open("./release/changelog.md", "r") as file:
-    lines = file.readlines()
-
-    modified_lines = []
-    in_admonition_section = False
-
-    # Map for matching the specific admonition type to its corresponding Sphinx markdown syntax
-    admonition_types = {
-        '> [!NOTE]': '```{note}',
-        '> [!TIP]': '```{tip}',
-        '> [!IMPORTANT]': '```{important}',
-        '> [!WARNING]': '```{warning}',
-        '> [!CAUTION]': '```{caution}'
-    }
-
-    for line in lines:
-        if any(line.startswith(k) for k in admonition_types):
-            for key in admonition_types:
-                if(line.startswith(key)):
-                    modified_lines.append(admonition_types[key] + '\n')
-                    break
-            in_admonition_section = True
-        elif in_admonition_section:
-            if line.strip() == '':
-                # If we encounter an empty line, close the admonition section
-                modified_lines.append('```\n\n')  # Close the admonition block
-                in_admonition_section = False
-            else:
-                modified_lines.append(line.lstrip('> '))
-        else:
-            modified_lines.append(line)
-
-    # In case the file ended while still in a admonition section, close it
-    if in_admonition_section:
-        modified_lines.append('```')
-
-    file.close()
-
-    with open("./release/changelog.md", 'w') as file:
-        file.writelines(modified_lines)
-
-os.system("mkdir -p ../_readthedocs/html/downloads")
-os.system("cp compatibility/compatibility-matrix-historical-6.0.csv ../_readthedocs/html/downloads/compatibility-matrix-historical-6.0.csv")
-
 latex_engine = "xelatex"
 latex_elements = {
    "fontpkg": r"""
@@ -82,108 +27,26 @@ project = "ROCm Documentation"
 project_path = os.path.abspath(".").replace("\\", "/")
 author = "Advanced Micro Devices, Inc."
 copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
-version = "6.4.2"
-release = "6.4.2"
+version = "7.0 Beta"
+release = "7.0 Beta"
 setting_all_article_info = True
-all_article_info_os = ["linux", "windows"]
+all_article_info_os = ["linux"]
 all_article_info_author = ""

-# pages with specific settings
 article_pages = [
-    {"file": "about/release-notes", "os": ["linux"], "date": "2025-07-21"},
-    {"file": "release/changelog", "os": ["linux"],},
-    {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/verl-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/stanford-megatron-lm-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/dgl-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/megablocks-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/taichi-compatibility", "os": ["linux"]},
-    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-history", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.3", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.3", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.4.3", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.4", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.6", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.7.3-20250325", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.3-20250415", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250605", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
-
-    {"file": "how-to/system-optimization/index", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
-    {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
-    {"file": "how-to/system-debugging", "os": ["linux"]},
-    {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
+    {"file": "preview/release", "date": "2025-07-24",},
 ]

 external_toc_path = "./sphinx/_toc.yml"
+external_projects_remote_repository = "" # don't fetch data to resolve intersphinx xrefs

 # Add the _extensions directory to Python's search path
 sys.path.append(str(Path(__file__).parent / 'extension'))

 extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref", "csv-to-list-table"]

-compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv')
-
 external_projects_current_project = "rocm"

-# Uncomment if facing rate limit exceed issue with local build
-# external_projects_remote_repository = ""
-
 html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "https://rocm-stg.amd.com/")
 html_context = {}
 if os.environ.get("READTHEDOCS", "") == "True":
@@ -192,18 +55,15 @@ if os.environ.get("READTHEDOCS", "") == "True":
 html_theme = "rocm_docs_theme"
 html_theme_options = {"flavor": "rocm-docs-home"}

-html_static_path = ["sphinx/static/css", "extension/how-to/rocm-for-ai/inference"]
-html_css_files = ["rocm_custom.css", "rocm_rn.css", "vllm-benchmark.css"]
-html_js_files = ["vllm-benchmark.js"]
+html_static_path = ["sphinx/static/css", "sphinx/static/js"]
+html_css_files = ["rocm_custom.css", "rocm_rn.css"]
+html_js_files = ["preview-version-list.js"]

-html_title = "ROCm Documentation"
+html_title = "ROCm 7.0 Beta documentation"

 html_theme_options = {"link_main_doc": False}

-redirects = {"reference/openmp/openmp": "../../about/compatibility/openmp.html"}
-
 numfig = False
-suppress_warnings = ["autosectionlabel.*"]

 html_context = {
    "project_path" : {project_path},
--- a/docs/contribute/building.md
+++ b/docs/contribute/building.md
@@ -1,150 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="Building ROCm documentation">
-  <meta name="keywords" content="documentation, Visual Studio Code, GitHub, command line,
-  AMD, ROCm">
-</head>
-
-# Building documentation
-
-## GitHub
-
-If you open a pull request and scroll down to the summary panel,
-there is a commit status section. Next to the line
-`docs/readthedocs.com:advanced-micro-devices-demo`, there is a `Details` link.
-If you click this, it takes you to the Read the Docs build for your pull request.
-
-![GitHub PR commit status](../data/contribute/commit-status.png)
-
-If you don't see this line, click `Show all checks` to get an itemized view.
-
-## Command line
-
-You can build our documentation via the command line using Python.
-
-See the `build.tools.python` setting in the [Read the Docs configuration file](https://github.com/ROCm/ROCm/blob/develop/.readthedocs.yaml) for the Python version used by Read the Docs to build documentation.
-
-See the [Python requirements file](https://github.com/ROCm/ROCm/blob/develop/docs/sphinx/requirements.txt) for Python packages needed to build the documentation.
-
-Use the Python Virtual Environment (`venv`) and run the following commands from the project root:
-
-```sh
-python3 -mvenv .venv
-
-.venv/bin/python     -m pip install -r docs/sphinx/requirements.txt
-.venv/bin/python     -m sphinx -T -E -b html -d _build/doctrees -D language=en docs _build/html
-```
-
-Navigate to `_build/html/index.html` and open this file in a web browser.
-
-## Visual Studio Code
-
-With the help of a few extensions, you can create a productive environment to author and test
-documentation locally using Visual Studio (VS) Code. Follow these steps to configure VS Code:
-
-1. Install the required extensions:
-
-   * Python: `(ms-python.python)`
-   * Live Server: `(ritwickdey.LiveServer)`
-
-2. Add the following entries to `.vscode/settings.json`.
-
-    ```json
-      {
-        "liveServer.settings.root": "/.vscode/build/html",
-        "liveServer.settings.wait": 1000,
-        "python.terminal.activateEnvInCurrentTerminal": true
-      }
-    ```
-
-    * `liveServer.settings.root`: Sets the root of the output website for live previews. Must be changed
-      alongside the `tasks.json` command.
-    * `liveServer.settings.wait`: Tells the live server to wait with the update in order to give Sphinx time to
-      regenerate the site contents and not refresh before the build is complete.
-    * `python.terminal.activateEnvInCurrentTerminal`: Activates the automatic virtual environment, so you
-      can build the site from the integrated terminal.
-
-3. Add the following tasks to `.vscode/tasks.json`.
-
-    ```json
-      {
-        "version": "2.0.0",
-        "tasks": [
-          {
-            "label": "Build Docs",
-            "type": "process",
-            "windows": {
-              "command": "${workspaceFolder}/.venv/Scripts/python.exe"
-            },
-            "command": "${workspaceFolder}/.venv/bin/python3",
-            "args": [
-              "-m",
-              "sphinx",
-              "-j",
-              "auto",
-              "-T",
-              "-b",
-              "html",
-              "-d",
-              "${workspaceFolder}/.vscode/build/doctrees",
-              "-D",
-              "language=en",
-              "${workspaceFolder}/docs",
-              "${workspaceFolder}/.vscode/build/html"
-            ],
-            "problemMatcher": [
-              {
-                "owner": "sphinx",
-                "fileLocation": "absolute",
-                "pattern": {
-                  "regexp": "^(?:.*\\.{3}\\s+)?(\\/[^:]*|[a-zA-Z]:\\\\[^:]*):(\\d+):\\s+(WARNING|ERROR):\\s+(.*)$",
-                  "file": 1,
-                  "line": 2,
-                  "severity": 3,
-                  "message": 4
-                }
-              },
-              {
-              "owner": "sphinx",
-                "fileLocation": "absolute",
-                "pattern": {
-                  "regexp": "^(?:.*\\.{3}\\s+)?(\\/[^:]*|[a-zA-Z]:\\\\[^:]*):{1,2}\\s+(WARNING|ERROR):\\s+(.*)$",
-                  "file": 1,
-                  "severity": 2,
-                  "message": 3
-                }
-              }
-            ],
-            "group": {
-              "kind": "build",
-              "isDefault": true
-            }
-          }
-        ]
-      }
-    ```
-
-    > Implementation detail: two problem matchers were needed to be defined,
-    > because VS Code doesn't tolerate some problem information being potentially
-    > absent. While a single regex could match all types of errors, if a capture
-    > group remains empty (the line number doesn't show up in all warning/error
-    > messages) but the `pattern` references said empty capture group, VS Code
-    > discards the message completely.
-
-4. Configure the Python virtual environment (`venv`).
-
-    From the Command Palette, run `Python: Create Environment`. Select `venv` environment and
-    `docs/sphinx/requirements.txt`.
-
-5. Build the docs.
-
-    Launch the default build task using one of the following options:
-
-    * A hotkey (the default is `Ctrl+Shift+B`)
-    * Issuing the `Tasks: Run Build Task` from the Command Palette
-
-6. Open the live preview.
-
-    Navigate to the site output within VS Code: right-click on `.vscode/build/html/index.html` and
-    select `Open with Live Server`. The contents should update on every rebuild without having to
-    refresh the browser.
--- a/docs/contribute/contributing.md
+++ b/docs/contribute/contributing.md
@@ -1,77 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="Contributing to ROCm">
-  <meta name="keywords" content="ROCm, contributing, contribute, maintainer, contributor">
-</head>
-
-# Contributing to the ROCm documentation
-
-The ROCm documentation, like all of ROCm, is open source and available on GitHub. You can contribute to the ROCm documentation by forking the appropriate repository, making your changes, and opening a pull request.
-
-To provide feedback on the ROCm documentation, including submitting an issue or suggesting a feature, see [Providing feedback about the ROCm documentation](./feedback.md).
-
-## The ROCm repositories
-
-The repositories for ROCm and all ROCm components are available on GitHub.
-
-| Module | Documentation location |
-| --- | --- |
-| ROCm framework | [https://github.com/ROCm/ROCm/tree/develop/docs](https://github.com/ROCm/ROCm/tree/develop/docs) |
-| ROCm installation for Linux | [https://github.com/ROCm/rocm-install-on-linux/tree/develop/docs](https://github.com/ROCm/rocm-install-on-linux/tree/develop/docs) |
-| ROCm HIP SDK installation for Windows |  [https://github.com/ROCm/rocm-install-on-windows/tree/develop/docs](https://github.com/ROCm/rocm-install-on-windows/tree/develop/docs) |
-
-Individual components have their own repositories with their own documentation in their own `docs` folders.
-
-The sub-folders within the `docs` folders across ROCm are typically structured as follows:
-
-| Sub-folder name | Documentation type |
-|-------|----------|
-| `install` | Installation instructions, build instructions, and prerequisites |
-| `conceptual` | Important concepts |
-| `how-to` | How to implement specific use cases |
-| `tutorials` | Tutorials |
-| `reference` | API references and other reference resources |
-
-## Editing and adding to the documentation
-
-ROCm documentation follows the [Google developer documentation style guide](https://developers.google.com/style/highlights).
-
-Most topics in the ROCm documentation are written in [reStructuredText (rst)](https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html), with some topics written in Markdown. Only use reStructuredText when adding new topics. Only use Markdown if the topic you are editing is already in Markdown.
-
-To edit or add to the documentation:
-
-1. Fork the repository you want to add to or edit.
-2. Clone your fork locally.
-3. Create a new local branch cut from the `develop` branch of the repository.
-4. Make your changes to the documentation.
-
-5. Optionally, build the documentation locally before creating a pull request by running the following commands from within the `docs` folder:
-
-    ```bash
-     pip3 install -r sphinx/requirements.txt  # You only need to run this command once
-     python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
-     ```
-
-    The output files will be located in the `docs/_build` folder. Open `docs/_build/html/index.html` to view the documentation.
-
-    For more information on ROCm build tools, see [Documentation toolchain](toolchain.md).
-6. Push your changes. A GitHub link will be returned in the output of the `git push` command. Open this link in a browser to create the pull request.
-
-    The documentation is built as part of the checks on pull request, along with spell checking and linting. Scroll to the bottom of your pull request to view all the checks.
-
-    Verify that the linting and spell checking have passed, and that the documentation was built successfully. New words or acronyms can be added to the [wordlist file](https://github.com/ROCm/rocm-docs-core/blob/develop/.wordlist.txt). The wordlist is subject to approval by the ROCm documentation team.
-
-    The Read The Docs build of your pull request can be accessed by clicking on the Details link next to the Read The Docs build check. Verify that your changes are in the build and look as expected.
-
-    ![The GitHub checks are collapsed by default and can be accessed by clicking on "Show All Checks".](../data/contribute/GitHubCheck-Highlight.png)
-  
-    ![The Read The Docs Build is accessed from the Details link in the Read The Docs check.](../data/contribute/GitHub-ReadThe-Docs-Highlight.png)
-
-    Your pull request will be reviewed by a member of the ROCm documentation team.
-
-See the [GitHub documentation](https://docs.github.com/en) for information on how to fork and clone a repository, and how to create and push a local branch.
-
-```{important}
-By creating a pull request (PR), you agree to allow your contribution to be licensed under the terms of the
-LICENSE.txt file in the corresponding repository. Different repositories can use different licenses.
-```
--- a/docs/contribute/feedback.md
+++ b/docs/contribute/feedback.md
@@ -1,27 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="Providing feedback for ROCm documentation">
-  <meta name="keywords" content="documentation, pull request, GitHub, AMD, ROCm">
-</head>
-
-# Providing feedback about the ROCm documentation
-
-Feedback about the ROCm documentation is welcome. You can provide feedback about the ROCm documentation either through GitHub Discussions or GitHub Issues.
-
-## Participating in discussions through GitHub Discussions
-
-You can ask questions, view announcements, suggest new features, and communicate with other members of the community through [GitHub Discussions](https://github.com/ROCm/ROCm/discussions).
-
-## Submitting issues through GitHub Issues
-
-You can submit issues through [GitHub Issues](https://github.com/ROCm/ROCm/issues).
-
-When creating a new issue, follow the following guidelines:
-
-1. Always do a search to see if the same issue already exists. If the issue already exists, upvote it, and comment or post to provide any additional details you might have.
-2. If you find an issue that is similar to your issue, log your issue, then add a comment that includes a link to the similar issue, as well as its issue number.
-3. Always provide as much information as possible. This helps reduce the time required to reproduce the issue.
-
-After creating your issue, make sure to check it regularly for any requests for additional information.
-
-For information about contributing content to the ROCm documentation, see [Contributing to the ROCm documentation](./contributing.md).
--- a/docs/contribute/toolchain.md
+++ b/docs/contribute/toolchain.md
@@ -1,46 +0,0 @@
-<head>
-  <meta charset="UTF-8">
-  <meta name="description" content="ROCm documentation toolchain">
-  <meta name="keywords" content="documentation, toolchain, Sphinx, Doxygen, MyST, AMD, ROCm">
-</head>
-
-# ROCm documentation toolchain
-
-The ROCm documentation relies on several open source toolchains and sites.
-
-## rocm-docs-core
-
-[rocm-docs-core](https://github.com/ROCm/rocm-docs-core) is an AMD-maintained
-project that applies customizations for the ROCm documentation. This project is the tool most ROCm repositories use as part of their documentation build pipeline. It is available as a [pip package on PyPI](https://pypi.org/project/rocm-docs-core/).
-
-See the user and developer guides for rocm-docs-core at
-{doc}`rocm-docs-core documentation<rocm-docs-core:index>`.
-
-## Sphinx
-
-[Sphinx](https://www.sphinx-doc.org/en/master/) is a documentation generator originally used for Python. It is now widely used in the open source community.
-
-### Sphinx External ToC
-
-[Sphinx External ToC](https://sphinx-external-toc.readthedocs.io/en/latest/intro.html) is a Sphinx extension used for ROCm documentation navigation. This tool generates a navigation menu on the left
-based on a YAML file (`_toc.yml.in`) that contains the table of contents.
-
-### Sphinx-book-theme
-
-[Sphinx-book-theme](https://sphinx-book-theme.readthedocs.io/en/latest/) is a Sphinx theme that defines the base appearance for ROCm documentation. ROCm documentation applies some customization, such as a custom header and footer, on top of the Sphinx Book Theme.
-
-### Sphinx Design
-
-[Sphinx design](https://sphinx-design.readthedocs.io/en/latest/index.html) is a Sphinx extension that adds design functionality. ROCm documentation uses Sphinx Design for grids, cards, and synchronized tabs.
-
-## Doxygen
-
-[Doxygen](https://www.doxygen.nl/) is a documentation generator that extracts information from in-code comments. It is used for API documentation.
-
-## Breathe
-
-[Breathe](https://www.breathe-doc.org/) is a Sphinx plugin for integrating Doxygen content.
-
-## Read the Docs
-
-[Read the Docs](https://docs.readthedocs.io/en/stable/) is the service that builds and hosts the HTML version of the ROCm documentation.
--- a/docs/data/about/compatibility/floating-point-data-types.png
+++ b/docs/data/about/compatibility/floating-point-data-types.png
--- a/docs/data/amd-logo.png
+++ b/docs/data/amd-logo.png
--- a/docs/data/banner-compatibility.jpg
+++ b/docs/data/banner-compatibility.jpg
--- a/docs/data/banner-conceptual.jpg
+++ b/docs/data/banner-conceptual.jpg
--- a/docs/data/banner-howto.jpg
+++ b/docs/data/banner-howto.jpg
--- a/docs/data/banner-installation.jpg
+++ b/docs/data/banner-installation.jpg
--- a/docs/data/banner-reference.jpg
+++ b/docs/data/banner-reference.jpg
--- a/docs/data/banner-text.xcf
+++ b/docs/data/banner-text.xcf
--- a/docs/data/banner.png
+++ b/docs/data/banner.png
--- a/docs/data/conceptual/TextClassification-3.png
+++ b/docs/data/conceptual/TextClassification-3.png
--- a/docs/data/conceptual/TextClassification-4.png
+++ b/docs/data/conceptual/TextClassification-4.png
--- a/docs/data/conceptual/TextClassification-5.png
+++ b/docs/data/conceptual/TextClassification-5.png
--- a/docs/data/conceptual/TextClassification-6.png
+++ b/docs/data/conceptual/TextClassification-6.png
--- a/docs/data/conceptual/TextClassification-7.png
+++ b/docs/data/conceptual/TextClassification-7.png
--- a/docs/data/conceptual/gpu-arch/image001.png
+++ b/docs/data/conceptual/gpu-arch/image001.png
--- a/docs/data/conceptual/gpu-arch/image002.png
+++ b/docs/data/conceptual/gpu-arch/image002.png
--- a/docs/data/conceptual/gpu-arch/image003.png
+++ b/docs/data/conceptual/gpu-arch/image003.png
--- a/docs/data/conceptual/gpu-arch/image004.png
+++ b/docs/data/conceptual/gpu-arch/image004.png
--- a/docs/data/conceptual/gpu-arch/image005.png
+++ b/docs/data/conceptual/gpu-arch/image005.png
--- a/docs/data/conceptual/gpu-arch/image006.png
+++ b/docs/data/conceptual/gpu-arch/image006.png
--- a/docs/data/conceptual/gpu-arch/image007.png
+++ b/docs/data/conceptual/gpu-arch/image007.png
--- a/docs/data/conceptual/gpu-arch/image008.png
+++ b/docs/data/conceptual/gpu-arch/image008.png
--- a/docs/data/conceptual/image018.png
+++ b/docs/data/conceptual/image018.png
--- a/docs/data/conceptual/inception-v3.png
+++ b/docs/data/conceptual/inception-v3.png
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Peter Park	cef7e945be	remove "mi300x" from fp4 inf page (#5385 )	2025-09-18 12:29:42 -04:00
Peter Park	bc59cb8bef	[docs/7.0-beta] Add pointers to latest documentation	2025-09-18 12:16:54 -04:00
Peter Park	fc8f998f8a	[docs/7.0-beta] Add SGLang benchmark doc (#5242 )	2025-09-02 09:17:25 -04:00
Peter Park	eb334335d0	Add training/inference user guides for 7.0 beta preview Dockers (#5234 ) Update docs/preview/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst Co-authored-by: dyokelso <dewi.yokelson@amd.com> Update docs/preview/benchmark-docker/inference-vllm-llama-3.1-405b-fp4.rst Co-authored-by: dyokelso <dewi.yokelson@amd.com> Update docs/preview/benchmark-docker/inference-sglang-deepseek-r1-fp4.rst Co-authored-by: dyokelso <dewi.yokelson@amd.com>	2025-08-28 23:24:44 -04:00
Peter Park	e7b032c3dc	[docs/7.0-beta] Update preview versions list (#5169 )	2025-08-08 10:11:05 -04:00
Peter Park	00a96635ae	update wording	2025-07-24 17:42:19 -04:00
Peter Park	f8ce4a9113	[docs/7.0-beta] 7.0 beta preview docs fix mi210 virtu OSes update wording words improve look update heading fix preview versions list url	2025-07-24 16:48:38 -04:00
Peter Park	b5598e581d	Remove extra files	2025-07-22 14:35:50 -04:00
Pratik Basyal	75216b8fcc	Post 642 update in Alpha release documentation (#5076 ) * Stable release version update * Stable ROCm version updated	2025-07-21 17:39:29 -04:00
Peter Park	a94a616f42	Add Alpha 2 docs	2025-07-10 16:39:29 -04:00
Peter Park	2c60d2f776	remove extra xref in training.rst	2025-07-02 19:32:35 -04:00
Peter Park	f62339a857	update .wordlist.txt	2025-07-02 19:30:44 -04:00
Peter Park	dc5cac0fbf	add alpha training docker docs	2025-07-02 19:28:26 -04:00
Alex Xu	8c7378ba71	use rocm-docs-core develop branch	2025-06-26 16:57:18 -04:00
Alex Xu	dcc949f441	upgrade rocm-docs-core to 1.21.1	2025-06-26 16:49:21 -04:00
Alex Xu	a4b1b2cc67	rocm-docs-core experiment	2025-06-26 15:58:18 -04:00
Peter Park	4f592f8949	[docs/7.0.0-alpha] Add docs for 7.0 alpha (#4978 )	2025-06-26 15:47:42 -04:00