Update packages - remove broken packages

Update envsetup.sh HIP_ON_ROCclr_ROOT path to hip and remove
aqlprofiletest
2026-01-10 15:18:11 -05:00 · 2025-05-21 08:10:47 -04:00 · 2025-05-20 08:13:44 -04:00 · 2025-04-14 15:26:24 -07:00 · 2025-04-14 13:41:34 -07:00 · 2025-04-14 12:55:13 -07:00
186 changed files with 5393 additions and 4184 deletions
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -115,6 +120,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -145,7 +151,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/HIP.yml
+++ b/.azuredevops/components/HIP.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -42,7 +47,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -67,6 +72,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependenciesAMD }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
 # compile clr
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
@@ -99,7 +105,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -125,6 +131,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependenciesNvidia }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - script: 'ls -1R $(Agent.BuildDirectory)/rocm'
    displayName: 'Artifact listing'
 # compile clr
--- a/.azuredevops/components/HIPIFY.yml
+++ b/.azuredevops/components/HIPIFY.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -77,7 +82,7 @@ jobs:
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool: ${{ variables.HIGH_BUILD_POOL }}
    workspace:
      clean: all
    steps:
@@ -97,6 +102,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - task: Bash@3
      displayName: Build and install other dependencies
      inputs:
@@ -113,7 +119,7 @@ jobs:
        extraBuildFlags: >-
          -DMIOPEN_BACKEND=HIP
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/miopen-deps
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/miopen-deps --generator Ninja
          -DGPU_TARGETS=${{ job.target }}
          -DMIOPEN_ENABLE_AI_KERNEL_TUNING=OFF
          -DMIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK=OFF
@@ -142,7 +148,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/MIVisionX.yml
+++ b/.azuredevops/components/MIVisionX.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -103,6 +108,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -129,7 +135,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/ROCR-Runtime.yml
+++ b/.azuredevops/components/ROCR-Runtime.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -42,7 +47,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -58,6 +63,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
@@ -78,7 +84,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/ROCdbgapi.yml
+++ b/.azuredevops/components/ROCdbgapi.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -24,7 +29,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -40,6 +45,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
--- a/.azuredevops/components/ROCgdb.yml
+++ b/.azuredevops/components/ROCgdb.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 # reference: https://github.com/ROCm/ROCgdb/blob/amd-staging/README-ROCM.md
 - name: aptPackages
  type: object
@@ -46,7 +51,8 @@ jobs:
    condition:
      and(
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
@@ -69,6 +75,7 @@ jobs:
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
      parameters:
        configureFlags: >-
--- a/.azuredevops/components/ROCmValidationSuite.yml
+++ b/.azuredevops/components/ROCmValidationSuite.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -96,6 +101,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -126,7 +132,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/Tensile.yml
+++ b/.azuredevops/components/Tensile.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -46,7 +51,7 @@ jobs:
  - template: /.azuredevops/variables-global.yml
  - name: ROCM_PATH
    value: $(Agent.BuildDirectory)/rocm
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -63,6 +68,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
  - task: Bash@3
    displayName: Create wheel file
@@ -104,7 +110,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/TransferBench.yml
+++ b/.azuredevops/components/TransferBench.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -67,6 +72,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -92,7 +98,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/amdsmi.yml
+++ b/.azuredevops/components/amdsmi.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -58,7 +63,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/aomp-extras.yml
+++ b/.azuredevops/components/aomp-extras.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -23,7 +28,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -39,6 +44,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      componentName: aomp-extras
--- a/.azuredevops/components/aomp.yml
+++ b/.azuredevops/components/aomp.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 # reference:
 # https://github.com/ROCm/aomp/blob/aomp-dev/docs/SOURCEINSTALL_PREREQUISITE.md
 - name: aptPackages
@@ -108,6 +113,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      componentName: extras
@@ -176,7 +182,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/composable_kernel.yml
+++ b/.azuredevops/components/composable_kernel.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -71,6 +76,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - script: |
        mkdir -p $(CCACHE_DIR)
        echo "##vso[task.prependpath]/usr/lib/ccache"
@@ -117,7 +123,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/half.yml
+++ b/.azuredevops/components/half.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -25,7 +30,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -41,6 +46,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
--- a/.azuredevops/components/hip-tests.yml
+++ b/.azuredevops/components/hip-tests.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -75,6 +80,7 @@ jobs:
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    # compile hip-tests
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
@@ -109,7 +115,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipBLAS-common.yml
+++ b/.azuredevops/components/hipBLAS-common.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -29,7 +34,7 @@ jobs:
  - name: ROCM_PATH
    value: $(Agent.BuildDirectory)/rocm
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -45,6 +50,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
--- a/.azuredevops/components/hipBLAS.yml
+++ b/.azuredevops/components/hipBLAS.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -89,6 +94,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -121,7 +127,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipBLASLt.yml
+++ b/.azuredevops/components/hipBLASLt.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -98,6 +103,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - task: Bash@3
      displayName: Add ROCm binaries to PATH
      inputs:
@@ -179,7 +185,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipCUB.yml
+++ b/.azuredevops/components/hipCUB.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -67,6 +72,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -94,7 +100,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipFFT.yml
+++ b/.azuredevops/components/hipFFT.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -79,6 +84,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -112,7 +118,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipRAND.yml
+++ b/.azuredevops/components/hipRAND.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -70,6 +75,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -101,7 +107,8 @@ jobs:
    condition:
        and(succeeded(),
          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+          eq(${{ parameters.aggregatePipeline }}, False)
        )
    variables:
    - group: common
--- a/.azuredevops/components/hipSOLVER.yml
+++ b/.azuredevops/components/hipSOLVER.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -82,6 +87,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
  # build external gtest and lapack
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
@@ -122,7 +128,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipSPARSE.yml
+++ b/.azuredevops/components/hipSPARSE.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -77,6 +82,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -116,7 +122,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipSPARSELt.yml
+++ b/.azuredevops/components/hipSPARSELt.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -91,6 +96,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
  # Build and install gtest and lapack
  # $(Pipeline.Workspace)/deps is a temporary folder for the build process
  # $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
@@ -150,7 +156,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -66,6 +71,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -95,7 +101,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/hipfort.yml
+++ b/.azuredevops/components/hipfort.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -76,6 +81,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -111,7 +117,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/llvm-project.yml
+++ b/.azuredevops/components/llvm-project.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -45,6 +50,7 @@ jobs:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
      skipLlvmSymlink: true
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      componentName: rocm-llvm
--- a/.azuredevops/components/rccl.yml
+++ b/.azuredevops/components/rccl.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -72,7 +77,7 @@ jobs:
    - template: /.azuredevops/variables-global.yml
    - name: HIP_ROCCLR_HOME
      value: $(Build.BinariesDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool: ${{ variables.HIGH_BUILD_POOL }}
    workspace:
      clean: all
    steps:
@@ -90,6 +95,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -124,7 +130,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rdc.yml
+++ b/.azuredevops/components/rdc.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -89,6 +94,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
  # Build grpc
    - task: Bash@3
      displayName: 'git clone grpc'
@@ -135,7 +141,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocAL.yml
+++ b/.azuredevops/components/rocAL.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -151,6 +156,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -182,7 +188,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocALUTION.yml
+++ b/.azuredevops/components/rocALUTION.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -85,6 +90,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -117,7 +123,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocBLAS.yml
+++ b/.azuredevops/components/rocBLAS.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -101,6 +106,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -145,7 +151,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocDecode.yml
+++ b/.azuredevops/components/rocDecode.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -73,6 +78,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
@@ -92,7 +98,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocFFT.yml
+++ b/.azuredevops/components/rocFFT.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -79,6 +84,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -113,7 +119,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocJPEG.yml
+++ b/.azuredevops/components/rocJPEG.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -75,6 +80,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -96,7 +102,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocMLIR.yml
+++ b/.azuredevops/components/rocMLIR.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -62,6 +67,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
@@ -87,7 +93,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -66,6 +71,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -93,7 +99,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocPyDecode.yml
+++ b/.azuredevops/components/rocPyDecode.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -77,6 +82,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - task: Bash@3
      displayName: 'Save Python Package Paths'
      inputs:
@@ -152,7 +158,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocRAND.yml
+++ b/.azuredevops/components/rocRAND.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -70,6 +75,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -98,7 +104,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocSOLVER.yml
+++ b/.azuredevops/components/rocSOLVER.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -88,6 +93,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: lapack
@@ -131,7 +137,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocSPARSE.yml
+++ b/.azuredevops/components/rocSPARSE.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -80,6 +85,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -126,7 +132,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocThrust.yml
+++ b/.azuredevops/components/rocThrust.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -71,6 +76,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -98,7 +104,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocWMMA.yml
+++ b/.azuredevops/components/rocWMMA.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -65,7 +70,7 @@ jobs:
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool: ${{ variables.HIGH_BUILD_POOL }}
    workspace:
      clean: all
    steps:
@@ -81,6 +86,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -113,7 +119,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocm-cmake.yml
+++ b/.azuredevops/components/rocm-cmake.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -26,7 +31,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
--- a/.azuredevops/components/rocm-core.yml
+++ b/.azuredevops/components/rocm-core.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -17,7 +22,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -100,6 +105,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        # https://github.com/ROCm/HIP/issues/2203
@@ -136,7 +142,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocm_bandwidth_test.yml
+++ b/.azuredevops/components/rocm_bandwidth_test.yml
@@ -5,6 +5,12 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
+
 - name: aptPackages
  type: object
  default:
@@ -49,7 +55,7 @@ jobs:
    value: $(Agent.BuildDirectory)/rocm
  - name: ROCR_LIB_DIR
    value: $(Agent.BuildDirectory)/rocm
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -66,6 +72,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
@@ -90,7 +97,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocm_smi_lib.yml
+++ b/.azuredevops/components/rocm_smi_lib.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -28,7 +33,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -59,7 +64,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocminfo.yml
+++ b/.azuredevops/components/rocminfo.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -38,7 +43,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -55,6 +60,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
      skipLlvmSymlink: true
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
@@ -72,7 +78,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -114,6 +119,7 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        dependencySource: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -140,7 +146,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocprofiler-register.yml
+++ b/.azuredevops/components/rocprofiler-register.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -17,7 +22,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
--- a/.azuredevops/components/rocprofiler-sdk.yml
+++ b/.azuredevops/components/rocprofiler-sdk.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -89,6 +94,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - task: Bash@3
      displayName: Add Python site-packages binaries to path
      inputs:
@@ -125,7 +131,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocprofiler-systems.yml
+++ b/.azuredevops/components/rocprofiler-systems.yml
@@ -6,6 +6,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -103,6 +108,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - task: Bash@3
      displayName: Add ROCm binaries to PATH
      inputs:
@@ -147,7 +153,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    timeoutInMinutes: 180
    variables:
--- a/.azuredevops/components/rocprofiler.yml
+++ b/.azuredevops/components/rocprofiler.yml
@@ -5,6 +5,12 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
+
 - name: aptPackages
  type: object
  default:
@@ -83,6 +89,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -115,7 +122,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rocr_debug_agent.yml
+++ b/.azuredevops/components/rocr_debug_agent.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -49,7 +54,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: 
+  pool:
    vmImage: ${{ variables.BASE_BUILD_POOL }}
  workspace:
    clean: all
@@ -65,6 +70,7 @@ jobs:
    parameters:
      checkoutRef: ${{ parameters.checkoutRef }}
      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
@@ -86,7 +92,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/roctracer.yml
+++ b/.azuredevops/components/roctracer.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -77,6 +82,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -107,7 +113,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/components/rpp.yml
+++ b/.azuredevops/components/rpp.yml
@@ -5,6 +5,11 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false
 - name: aptPackages
  type: object
  default:
@@ -84,6 +89,7 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -113,7 +119,8 @@ jobs:
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName']))
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
      )
    variables:
    - group: common
--- a/.azuredevops/templates/steps/artifact-download.yml
+++ b/.azuredevops/templates/steps/artifact-download.yml
@@ -12,6 +12,11 @@ parameters:
 - name: fileFilter
  type: string
  default: ''
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false

 steps:
 - task: Bash@3
@@ -27,17 +32,23 @@ steps:
 - task: DownloadPipelineArtifact@2
  displayName: Download ${{ parameters.componentName }}
  inputs:
-    buildType: 'specific'
-    project: ROCm-CI
-    definition: ${{ parameters.pipelineId }}
-    specificBuildWithTriggering: true
-    itemPattern: '**/*${{ parameters.fileFilter }}*'
-    # aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
-    ${{ if notIn(parameters.componentName, 'aomp') }}:
-      buildVersionToDownload: latestFromBranch # default is 'latest'
-    branchName: refs/heads/${{ parameters.branchName }}
-    allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
-    targetPath: '$(Pipeline.Workspace)/d'
+    ${{ if eq(parameters.aggregatePipeline, false) }}:
+      buildType: 'specific'
+      project: ROCm-CI
+      definition: ${{ parameters.pipelineId }}
+      specificBuildWithTriggering: true
+      itemPattern: '**/*${{ parameters.fileFilter }}*'
+      # aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
+      ${{ if notIn(parameters.componentName, 'aomp') }}:
+        buildVersionToDownload: latestFromBranch # default is 'latest'
+      branchName: refs/heads/${{ parameters.branchName }}
+      allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
+      targetPath: '$(Pipeline.Workspace)/d'
+    ${{ else }}:
+      buildType: 'current'
+      itemPattern: '**/${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
+      allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
+      targetPath: '$(Pipeline.Workspace)/d'
 - task: ExtractFiles@1
  displayName: Extract ${{ parameters.componentName }}
  inputs:
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -31,6 +31,11 @@ parameters:
 - name: setupHIPLibrarySymlinks
  type: boolean
  default: false
+# set to true if doing full build of ROCm stack
+# and dependencies are pulled from same pipeline
+- name: aggregatePipeline
+  type: boolean
+  default: false

 - name: componentVarList
  type: object
@@ -354,6 +359,7 @@ steps:
      parameters:
        componentName: ${{ split(dependency, ':')[0] }}
        pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
          fileFilter: "${{ split(dependency, ':')[1] }}*${{ parameters.gpuTarget }}"
        # dependencySource = staging
@@ -383,6 +389,7 @@ steps:
      parameters:
        componentName: ${{ dependency }}
        pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
          fileFilter: ${{ parameters.gpuTarget }}
        # dependencySource = staging
--- a/.azuredevops/templates/steps/miopen-get-ck-build.yml
+++ b/.azuredevops/templates/steps/miopen-get-ck-build.yml
@@ -54,7 +54,7 @@ steps:
      fi

      echo "Downloading CK artifact from $ARTIFACT_URL"
-      wget -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
+      wget --tries=5 --waitretry=10 --retry-connrefused -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
      unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
      mkdir -p $(Agent.BuildDirectory)/rocm
      tar -zxvf $(System.ArtifactsDirectory)/$ARTIFACT_NAME/*.tar.gz -C $(Agent.BuildDirectory)/rocm
--- a/.azuredevops/variables-global.yml
+++ b/.azuredevops/variables-global.yml
@@ -30,13 +30,13 @@ variables:
 - name: GFX90A_TEST_POOL
  value: gfx90a_test_pool
 - name: LATEST_RELEASE_VERSION
-  value: 6.3.4
- name: REPO_RADEON_VERSION
-  value: 6.3.4
- name: NEXT_RELEASE_VERSION
  value: 6.4.0
+- name: REPO_RADEON_VERSION
+  value: 6.4
+- name: NEXT_RELEASE_VERSION
+  value: 6.5.0
 - name: LATEST_RELEASE_TAG
-  value: rocm-6.3.4
+  value: rocm-6.4.0
 - name: DOCKER_SKIP_GFX
  value: gfx90a
 - name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -34,7 +34,6 @@ Autocast
 BARs
 BLAS
 BMC
-BabelStream
 Blit
 Blockwise
 Bluefield
@@ -77,7 +76,6 @@ Concretized
 Conda
 ConnectX
 CuPy
-da
 Dashboarding
 DBRX
 DDR
@@ -139,7 +137,6 @@ GDR
 GDS
 GEMM
 GEMMs
-GFLOPS
 GFortran
 GFXIP
 Gemma
@@ -228,7 +225,6 @@ LM
 LSAN
 LSan
 LTS
-LanguageCrossEntropy
 LoRA
 MEM
 MERCHANTABILITY
@@ -246,7 +242,6 @@ MMIOH
 MMU
 MNIST
 MPI
-MPT
 MSVC
 MVAPICH
 MVFFR
@@ -263,7 +258,6 @@ Meta's
 Miniconda
 MirroredStrategy
 Mixtral
-MosaicML
 Multicore
 Multithreaded
 MyEnvironment
@@ -334,7 +328,6 @@ PipelineParallel
 PnP
 PowerEdge
 PowerShell
-Pretrained
 Pretraining
 Profiler's
 PyPi
@@ -643,7 +636,6 @@ hipSPARSELt
 hipTensor
 hipamd
 hipblas
-hipcc
 hipcub
 hipfft
 hipfort
@@ -759,7 +751,6 @@ profilers
 protobuf
 pseudorandom
 py
-pytorch
 recommender
 recommenders
 quantile
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ different versions of the ROCm software stack and its components.

 ## ROCm 6.4.0

-See the [ROCm 6.4.0 release notes](https://rocm.docs.amd.com/en/docs-6.4.0/about/release-notes.html)
+See the [ROCm 6.4.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
 for a complete overview of this release.

 ### **AMD SMI** (25.3.0)
@@ -125,7 +125,7 @@ Some workaround options are as follows:
 - The `pasid` field in struct `amdsmi_process_info_t` will be deprecated in a future ROCm release.

 ```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples,
+See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
 and in-depth descriptions.
 ```

@@ -678,6 +678,7 @@ The following lists the backward incompatible changes planned for upcoming major

 * Roofline support for Ubuntu 24.04.
 * Experimental support `rocprofv3` (not enabled as default).
+* Experimental feature: Spatial multiplexing.

 #### Resolved issues

@@ -736,7 +737,7 @@ The following lists the backward incompatible changes planned for upcoming major
 - Fixed `rsmi_dev_target_graphics_version_get`, `rocm-smi --showhw`, and `rocm-smi --showprod` not displaying graphics version correctly for Instinct MI200 series, MI100 series, and RDNA3-based GPUs. 

 ```{note}
-See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples,
+See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
 and in-depth descriptions.
 ```

@@ -745,10 +746,6 @@ and in-depth descriptions.
 #### Added 

 - Support for VA-API and rocDecode tracing.
- Aggregation of MPI data collected across distributed nodes and ranks. The data is concatenated into a single proto file.
-
-#### Changed
- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 #### Resolved issues

@@ -759,9 +756,9 @@ and in-depth descriptions.
 - Fixed interruption in config file generation.

 - Fixed segmentation fault while running rocprof-sys-instrument.
- Fixed an issue where running `rocprof-sys-causal` or using the `-I all` option with `rocprof-sys-sample` caused the system to become non-responsive.

- Fixed an issue where sampling multi-GPU Python workloads caused the system to stop responding.
+#### Changed
+- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 ### **rocPRIM** (3.4.0)

@@ -3459,7 +3456,7 @@ See [issue #3499](https://github.com/ROCm/ROCm/issues/3499) on GitHub.

 - Error when running Omniperf with an application with command line arguments. As a workaround, create an
  intermediary script to call the application with the necessary arguments, then call the script with Omniperf. This
-  issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/rocprofiler-compute/issues/347).
+  issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/omniperf/issues/347).

 - Omniperf might not work with AMD Instinct MI300 accelerators out of the box, resulting in the following error:
  "*ERROR gfx942 is not enabled rocprofv1. Available profilers include: ['rocprofv2']*". As a workaround, add the
@@ -4336,7 +4333,7 @@ for a complete overview of this release.
 * New multiple node and GPU support.
  Unsmoothed and smoothed aggregations and Ruge-Stueben AMG now work with multiple nodes
  and GPUs. For more information, refer to the 
-  [API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.1.0/usermanual/solvers.html#unsmoothed-aggregation-amg).
+  [API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/latest/usermanual/solvers.html#unsmoothed-aggregation-amg).

 ### **rocDecode** (0.5.0)

--- a/README.md
+++ b/README.md
@@ -127,6 +127,7 @@ bash install-prerequisites.sh
 export GPU_ARCHS="gfx942"               # Example
 export GPU_ARCHS="gfx940;gfx941;gfx942" # Example

+cd ~/WORKSPACE/
 # Pick and run build commands in the docker container:
 # Build rocm-dev packages
 make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -80,23 +80,23 @@ for the complete list of PyTorch versions tested for compatibility with ROCm. Se

 ### VP9 support added to rocDecode and rocPyDecode

-VP9 support is added to [rocDecode](https://rocm.docs.amd.com/projects/rocDecode/en/latest/index.html) and [rocPyDecode](https://rocm.docs.amd.com/projects/rocPyDecode/en/latest/index.html), allowing enhanced codec support with VP9 encoding.
+VP9 support is added to [rocDecode](https://github.com/ROCm/rocDecode) and [rocPyDecode](https://github.com/ROCm/rocPyDecode), allowing enhanced codec support with VP9 encoding.

 ### Bitstream reader support added to rocDecode

-The new bitstream reader feature has been added to [rocDecode](https://rocm.docs.amd.com/projects/rocDecode/en/latest/index.html). It contains built-in stream file parsers, including an elementary stream file parser and an IVF container file parser. It enables decoding without the requirement for FFmpeg demuxer. The reader can parse AVC, HEVC, and AV1 elementary stream files, and AV1 IVF container files. See [Using the rocDecode bitstream reader APIs](https://rocm.docs.amd.com/projects/rocDecode/en/latest/how-to/using-rocDecode-bitstream.html) for more information.
+The new bitstream reader feature has been added to [rocDecode](https://github.com/ROCm/rocDecode). It contains built-in stream file parsers, including an elementary stream file parser and an IVF container file parser. It enables decoding without the requirement for FFmpeg demuxer. The reader can parse AVC, HEVC, and AV1 elementary stream files, and AV1 IVF container files. See [Using the rocDecode bitstream reader APIs](https://rocm.docs.amd.com/projects/rocDecode/en/latest/how-to/using-rocDecode-bitstream.html) for more information.

 ### DLPack support added to rocAL

-[rocAL](https://rocm.docs.amd.com/projects/rocAL/en/latest/index.html) now supports DLPack, allowing rocAL GPU tensor to be exchanged with PyTorch. This allows faster data processing by leveraging DLPack tensors. It also improves the GPU based workload performance. For more details, see [DLpack github reference documentation](https://dmlc.github.io/dlpack/latest/).
+[rocAL](https://github.com/ROCm/rocAL) now supports DLPack, allowing rocAL GPU tensor to be exchanged with PyTorch. This allows faster data processing by leveraging DLPack tensors. It also improves the GPU based workload performance. For more details, see [DLpack github reference documentation](https://dmlc.github.io/dlpack/latest/).

 ### ROCm Compute Profiler updates

-ROCm Compute Profiler now supports:
+* ROCm Compute Profiler now supports:

-* ROCprofiler-SDK (`rocprofv3`)
-* Experimental multi-nodes profiling support.
-* Roofline plot for 64-bit floating point (FP64) and 32-bit floating point (FP32) data types.
+    * ROCprofiler-SDK (`rocprofv3`)
+    * Experimental multi-nodes profiling support.
+    * Roofline plot for 64-bit floating point (FP64) and 32-bit floating point (FP32) data types.

 ### ROCm Systems Profiler updates

@@ -253,19 +253,14 @@ Click {fab}`github` to go to the component's source code on GitHub.
        </tbody>
        <tbody class="rocm-components-libs rocm-components-communication tbody-reverse-zebra">
            <tr>
-                <th rowspan="2"></th>
-                <th rowspan="2">Communication</th>
+                <th rowspan="1"></th>
+                <th rowspan="1">Communication</th>
                <td><a href="https://rocm.docs.amd.com/projects/rccl/en/docs-6.4.0/index.html">RCCL</a></td>
                <td>2.21.5&nbsp;&Rightarrow;&nbsp;<a href="#rccl-2-22-3">2.22.3</a></td>
                <td><a href="https://github.com/ROCm/rccl"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
-            <tr>
-            <td><a href="https://rocm.docs.amd.com/projects/rocSHMEM/en/docs-6.4.0/index.html">rocSHMEM</a></td>
-                <td>2.0.0</td>
-                <td><a href="https://github.com/ROCm/rocSHMEM"><i class="fab fa-github fa-lg"></i></a></td>
-            </tr>
        </tbody>
-        <tbody class="rocm-components-libs rocm-components-math tbody-reverse-zebra">
+        <tbody class="rocm-components-libs rocm-components-math">
            <tr>
                <th rowspan="16"></th>
                <th rowspan="16">Math</th>
@@ -349,7 +344,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                <td><a href="https://github.com/ROCm/Tensile"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-libs rocm-components-primitives tbody-reverse-zebra">
+        <tbody class="rocm-components-libs rocm-components-primitives">
            <tr>
                <th rowspan="4"></th>
                <th rowspan="4">Primitives</th>
@@ -373,7 +368,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                <td><a href="https://github.com/ROCm/rocThrust"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-tools rocm-components-system tbody-reverse-zebra">
+        <tbody class="rocm-components-tools rocm-components-system">
            <tr>
                <th rowspan="7">Tools</th>
                <th rowspan="7">System management</th>
@@ -402,7 +397,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                <td><a href="https://github.com/ROCm/ROCmValidationSuite"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-tools rocm-components-perf">
+        <tbody class="rocm-components-tools rocm-components-perf tbody-reverse-zebra">
            <tr>
                <th rowspan="6"></th>
                <th rowspan="6">Performance</th>
@@ -443,7 +438,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                            class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-tools rocm-components-dev">
+        <tbody class="rocm-components-tools rocm-components-dev tbody-reverse-zebra">
            <tr>
                <th rowspan="5"></th>
                <th rowspan="5">Development</th>
@@ -479,7 +474,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                            class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-compilers tbody-reverse-zebra">
+        <tbody class="rocm-components-compilers">
            <tr>
                <th rowspan="2" colspan="2">Compilers</th>
                <td><a href="https://rocm.docs.amd.com/projects/HIPCC/en/docs-6.4.0/index.html">HIPCC</a></td>
@@ -494,7 +489,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
                            class="fab fa-github fa-lg"></i></a></td>
            </tr>
        </tbody>
-        <tbody class="rocm-components-runtimes tbody-reverse-zebra">
+        <tbody class="rocm-components-runtimes">
            <tr>
                <th rowspan="2" colspan="2">Runtimes</th>
                <td><a href="https://rocm.docs.amd.com/projects/HIP/en/docs-6.4.0/index.html">HIP</a></td>
@@ -634,7 +629,7 @@ Some workaround options are as follows:
 - The `pasid` field in struct `amdsmi_process_info_t` will be deprecated in a future ROCm release.

 ```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples,
+See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
 and in-depth descriptions.
 ```

@@ -767,17 +762,17 @@ and in-depth descriptions.

 #### Changed

-* The `roc-obj` tools have been deprecated and will be removed in a future release.
+* `roc-obj` tools is deprecated and will be removed in an upcoming release.

-   -	`llvm-objdump`, `llvm-objcopy`, and `llvm-readobj` will be enhanced to provide similar functionality as that provided by the `roc-obj` tools . The LLVM tools are available in the `rocm-llvm` pkg.
-   -	While not related to the deprecation, also note that the `roc-obj` tools’ package dependency on Perl has been changed to recommended. It is the user’s responsibility to install Perl to use these tools.
+    - Perl package installation is not required, and users will need to install this themselves if they want to.
+    - Support for ROCm Object tooling has moved into `llvm-objdump` provided by package `rocm-llvm`.

 * SDMA retainer logic is removed for engine selection in operation of runtime buffer copy.

 #### Optimized

 * `hipGraphLaunch` parallelism is improved for complex data-parallel graphs.
-* Round-robin queue mechanism is updated for command scheduling. For multi-streams execution, HSA queue from null stream lock is freed and won't occupy the queue ID after the kernel in the stream is finished.
+* Make the round-robin queue selection in command scheduling. For multi-streams execution, HSA queue from null stream lock is freed and won't occupy the queue ID after the kernel in the stream is finished.
 * The HIP runtime doesn't free bitcode object before code generation. It adds a cache, which allows compiled code objects to be reused instead of recompiling. This improves performance on multi-GPU systems.
 * Runtime now uses unified copy approach:

@@ -786,11 +781,6 @@ and in-depth descriptions.
    - The default environment variable `GPU_FORCE_BLIT_COPY_SIZE` is set to `16`, which limits the kernel copy to sizes less than 16 KB, while copies larger than that would be handled by `SDMA` engine.
    - Blit code is refactored, and ASAN instrumentation is cleaned up.

-* HIP runtime uses signals without interrupts:
-
-    - In active wait mode, uses signals without interrupts by default.
-    - Only when a callback is required, switches to the interrupts.
-
 #### Resolved issues

 * Out-of-memory error on Microsoft Windows. When the user calls `hipMalloc` for device memory allocation while specifying a size larger than the available device memory, the HIP runtime fixes the error in the API implementation, allocating the available device memory plus system memory (shared virtual memory).
@@ -801,15 +791,13 @@ and in-depth descriptions.

 The following lists the backward incompatible changes planned for upcoming major ROCm releases.

-* Signature changes in APIs to match corresponding CUDA APIs,
+* Signature changes in APIs to correspond with NVIDIA CUDA APIs,

    - `hiprtcCreateProgram`
    - `hiprtcCompileProgram`
    - `hipCtxGetApiVersion`

 * Behavior of `hipPointerGetAttributes` is changed to match corresponding CUDA API in version 11 and later releases.
-* Behavior of `hipFree` is changed to match corresponding CUDA API `cudaFree`.
-* HIP vector constructor changes for `hipComplex`.
 * Return error/value code updates in the following hip APIs to match the corresponding CUDA APIs,

    - `hipModuleLaunchKernel`
@@ -1194,6 +1182,7 @@ The following lists the backward incompatible changes planned for upcoming major

 * Roofline support for Ubuntu 24.04.
 * Experimental support `rocprofv3` (not enabled as default).
+* Experimental feature: Spatial multiplexing.

 #### Resolved issues

@@ -1252,7 +1241,7 @@ The following lists the backward incompatible changes planned for upcoming major
 - Fixed `rsmi_dev_target_graphics_version_get`, `rocm-smi --showhw`, and `rocm-smi --showprod` not displaying graphics version correctly for Instinct MI200 series, MI100 series, and RDNA3-based GPUs. 

 ```{note}
-See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples,
+See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
 and in-depth descriptions.
 ```

@@ -1261,11 +1250,6 @@ and in-depth descriptions.
 #### Added 

 - Support for VA-API and rocDecode tracing.
- Aggregation of MPI data collected across distributed nodes and ranks. The data is concatenated into a single proto file.
-
-
-#### Changed
- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 #### Resolved issues

@@ -1276,9 +1260,9 @@ and in-depth descriptions.
 - Fixed interruption in config file generation.

 - Fixed segmentation fault while running rocprof-sys-instrument.
- Fixed an issue where running `rocprof-sys-causal` or using the `-I all` option with `rocprof-sys-sample` caused the system to become non-responsive.

- Fixed an issue where sampling multi-GPU Python workloads caused the system to stop responding.
+#### Changed
+- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 ### **rocPRIM** (3.4.0)

@@ -1677,14 +1661,6 @@ When running the hipBLASLt library using the transpose configuration (TT) with F

 In RCCL library, you might receive incorrect results in All-Reduce collective API, when using Link Layer (LL) protocol in graph mode while MSCCL++ is enabled. This issue occurs when the protocal state information are updated in the host-side code instead of in a kernel, which is not supported in graph mode. As a workaround, you can disable MSCCL++ by setting the environment variable `RCCL_MSCCLPP_ENABLE=0`. However, consider that this might negatively impact the performance. The issue will be fixed in a future ROCm release. See [GitHub issue #4616](https://github.com/ROCm/ROCm/issues/4616).

-### ROCm installation might fail in some Linux distribution kernels
-
-ROCm 6.4.0 might encounter an installation issue on some Linux distribution kernels, including the [patch](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9011e49d54dcc7653ebb8a1e05b5badb5ecfa9f9) that adds more restrictions for symbol lookups. This change breaks the standard symbol lookup methods in the kernel.
-
-As a result, the AMD kernel driver Dynamic Kernel Mode Support (DKMS) package might fail to install when the symbols required to use the PeerDirect API with Mellanox NICs are not found. In the event of such a failure, the AMD DKMS package attempts to locate these symbols directly from the Mellanox installation. However, for non-standard Mellanox NIC installations, the AMD DKMS package might not be able to locate these symbols.
-
-This issue will be fixed in a future ROCm release. As a workaround, you can run the script that allows the DKMS package to locate Mellanox symbols from the Mellanox installation without you requiring to update the new DKMS package. For downloading the script and getting more details on the issue and workaround, see [GitHub issue #4671](https://github.com/ROCm/ROCm/issues/4671).
-
 ## ROCm resolved issues

 The following are previously known issues resolved in this release. For resolved issues related to
@@ -1732,7 +1708,7 @@ and will be disabled in a future release.

 * The `__AMDGCN_WAVEFRONT_SIZE__` macro and `__AMDGCN_WAVEFRONT_SIZE` alias will be removed in an upcoming release.
  It is recommended to remove any use of this macro. For more information, see
-  [AMDGPU support](https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.4.0/LLVM/clang/html/AMDGPUSupport.html).
+  [AMDGPU support](https://rocm.docs.amd.com/projects/llvm-project/en/docs-6.3.2/LLVM/clang/html/AMDGPUSupport.html).
 * `warpSize` will only be available as a non-`constexpr` variable. Where required,
  the wavefront size should be queried via the `warpSize` variable in device code,
  or via `hipGetDeviceProperties` in host code. Neither of these will result in a compile-time constant. 
@@ -1770,5 +1746,4 @@ There are a number of upcoming changes planned for HIP runtime API in an upcomin
 that are not backward compatible with prior releases. Most of these changes increase 
 alignment between HIP and CUDA APIs or behavior. Some of the upcoming changes are to 
 clean up header files, remove namespace collision, and have a clear separation between 
-`hipRTC` and HIP runtime. For more information, see [HIP Upcoming changes](#hip-6-4-0)
-or [HIP 7.0 Is Coming: What You Need to Know to Stay Ahead](https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0:-guidance-on-upcoming-compatibility-changes/README.html).
+`hipRTC` and HIP runtime. For more information refer to [HIP Upcoming changes](#hip-6-4-0).
--- a/docs/about/license.md
+++ b/docs/about/license.md
@@ -81,7 +81,6 @@ additional licenses. Please review individual repositories for more information.
 | [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
 | [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
 | [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
-| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
 | [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
 | [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
 | [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -1,121 +1,121 @@
-ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
-      ,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
-      ,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
-      ,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
-      ,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
-,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
-,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
-      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
-      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
-      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
-      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
-      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
-      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
-      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
-      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
-      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
-      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
-,,,,,,,,,,,,,,,
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
-      ,,,,,,,,,,,,,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
-      ,,,,,,,,,,,,,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-,,,,,,,,,,,,,,,
-      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
-      ,,,,,,,,,,,,,,,
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
-      :doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
-      :doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
-      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
-      ,,,,,,,,,,,,,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
-      :doc:`rocSHMEM <rocSHMEM:index>`,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      ,,,,,,,,,,,,,,,
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
-      :doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
-      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
-      :doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
-      :doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
-      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
-      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
-      :doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
-      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
-      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
-      :doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
-      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
-      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
-      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
-      :doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
-      ,,,,,,,,,,,,,,,
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
-      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,
-      SUPPORT LIBS,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
-      ,,,,,,,,,,,,,,,
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
-      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
-      ,,,,,,,,,,,,,,,
-      PERFORMANCE TOOLS,,,,,,,,,,,,,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
-      ,,,,,,,,,,,,,,,
-      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
-      :doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
-      ,,,,,,,,,,,,,,,
-      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-,,,,,,,,,,,,,,,
-      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
+ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
+      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
+      ,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
+      ,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
+      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
+      ,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
+      ,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
+      ,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
+,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
+,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
+      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
+      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
+      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
+      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
+      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
+      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
+      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
+      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
+      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
+      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
+,,,,,,,,,,,,,,,
+      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
+,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,
+      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
+      ,,,,,,,,,,,,,,,
+      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+,,,,,,,,,,,,,,,
+      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
+      ,,,,,,,,,,,,,,,
+      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
+      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
+      :doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
+      :doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
+      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
+      ,,,,,,,,,,,,,,,
+      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
+      ,,,,,,,,,,,,,,,
+      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
+      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
+      :doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
+      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
+      :doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
+      :doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
+      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
+      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
+      :doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
+      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
+      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
+      :doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
+      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
+      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
+      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
+      :doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
+      ,,,,,,,,,,,,,,,
+      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
+      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      ,,,,,,,,,,,,,,,
+      SUPPORT LIBS,,,,,,,,,,,,,,,
+      `hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.2,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
+      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
+      ,,,,,,,,,,,,,,,
+      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
+      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
+      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60102,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
+      ,,,,,,,,,,,,,,,
+      PERFORMANCE TOOLS,,,,,,,,,,,,,,,
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
+      ,,,,,,,,,,,,,,,
+      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
+      :doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
+      ,,,,,,,,,,,,,,,
+      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
+      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+,,,,,,,,,,,,,,,
+      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -77,7 +77,6 @@ compatibility and system requirements.
      ,,,
      COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
      :doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.20.5
-      :doc:`rocSHMEM <rocSHMEM:index>`,2.0.0,N/A,N/A
      ,,,
      MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -14,18 +14,17 @@ JAX provides a NumPy-like API, which combines automatic differentiation and the
 Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
 learning at scale.

-JAX uses composable transformations of Python and NumPy through just-in-time
-(JIT) compilation, automatic vectorization, and parallelization. To learn about
-JAX, including profiling and optimizations, see the official `JAX documentation
+JAX uses composable transformations of Python and NumPy through just-in-time (JIT) compilation,
+automatic vectorization, and parallelization. To learn about JAX, including profiling and
+optimizations, see the official `JAX documentation
 <https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.

-ROCm support for JAX is upstreamed, and users can build the official source code
-with ROCm support:
+ROCm support for JAX is upstreamed and users can build the official source code with ROCm
+support:

 - ROCm JAX release:

-  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
-    with ROCm and JAX preinstalled.
+  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>` with ROCm and JAX pre-installed.

  - ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_

@@ -37,8 +36,8 @@ with ROCm support:
  - Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_

  - See the `AMD GPU (Linux) installation section
-    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
-    the JAX documentation.
+    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in the JAX
+    documentation.

 .. note::

@@ -47,44 +46,6 @@ with ROCm support:
   `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
   follow upstream JAX releases and use the latest available ROCm version.

-Use cases and recommendations
-================================================================================
-
-* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
-  blog explores the implementation and training of a Generative Pre-trained
-  Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s JAX-based
-  nanoGPT. Comparing how essential GPT components—such as self-attention 
-  mechanisms and optimizers—are realized in JAX and JAX, also highlights
-  JAX’s unique features.
-
-* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
-  ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
-  blog post provides a comprehensive guide on enhancing the training efficiency
-  of GPT models by implementing mixed precision techniques in JAX, specifically
-  tailored for AMD GPUs utilizing the ROCm platform.
-
-* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
-  blog demonstrates how to develop a custom fused dropout-activation kernel for
-  matrices using Triton, integrate it with JAX, and benchmark its performance
-  using ROCm.
-
-* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
-  outlines the process of fine-tuning a Bidirectional Encoder Representations
-  from Transformers (BERT)-based large language model (LLM) using JAX for a text
-  classification task. The blog post discuss techniques for parallelizing the
-  fine-tuning across multiple AMD GPUs and assess the model's performance on a
-  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
-  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
-  used on a multi-GPU setup.
-
-* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
-  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  accelerator using ROCm. The page is aimed at helping users achieve optimal
-  performance for deep learning and other high-performance computing tasks on
-  the MI300X GPU.
-
-For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
-
 .. _jax-docker-compat:

 Docker image compatibility
@@ -96,8 +57,8 @@ Docker image compatibility

 AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
 with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories represent the latest JAX version from the official Docker Hub and are validated for
-`ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click the |docker-icon|
+associated inventories are validated for
+`ROCm 6.3.1 <https://repo.radeon.com/rocm/apt/6.3.1/>`_. Click the |docker-icon|
 icon to view the image on Docker Hub.

 .. list-table:: JAX Docker image components
@@ -107,26 +68,24 @@ icon to view the image on Docker Hub.
      - JAX
      - Linux
      - Python
-
    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.12/images/sha256-4069398229078f3311128b6d276c6af377c7e97d3363d020b0bf7154fae619ca"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
+           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.12/images/sha256-085a0cd5207110922f1fca684933a9359c66d42db6c5aba4760ed5214fdabde0"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>

-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
+      - `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
      - Ubuntu 24.04
      - `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
-
    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.10/images/sha256-a137f901f91ce6c13b424c40a6cf535248d4d20fd36d5daf5eee0570190a4a11"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
+           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.10/images/sha256-f88eddad8f47856d8640b694da4da347ffc1750d7363175ab7dc872e82b43324"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>

-      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
+      - `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
      - Ubuntu 22.04
      - `3.10.14 <https://www.python.org/downloads/release/python-31014/>`_

 AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
 with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
+associated inventories are tested for `ROCm 6.2.4 <https://repo.radeon.com/rocm/apt/6.2.4/>`_.

 .. list-table:: JAX community Docker image components
    :header-rows: 1
@@ -135,37 +94,35 @@ associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/
      - JAX
      - Linux
      - Python
-
    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
+           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.12.7/images/sha256-a6032d89c07573b84c44e42c637bf9752b1b7cd2a222d39344e603d8f4c63beb?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>

-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
+      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 22.04
-      - `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
-
+      - `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
+           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.11.10/images/sha256-d462f7e445545fba2f3b92234a21beaa52fe6c5f550faabcfdcd1bf53486d991?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>

-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
+      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 22.04
-      - `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
-
+      - `3.11.10 <https://www.python.org/downloads/release/python-31110/>`_
    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
+           <a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.10.15/images/sha256-6f2d4d0f529378d9572f0e8cfdcbc101d1e1d335bd626bb3336fff87814e9d60?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>

-      - `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
+      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
+      - `3.10.15 <https://www.python.org/downloads/release/python-31015/>`_

-Key ROCm libraries for JAX
+Critical ROCm libraries for JAX
 ================================================================================

-JAX functionality on ROCm is determined by its underlying library
-dependencies. These ROCm components affect the capabilities, performance, and
-feature set available to developers.
+The functionality of JAX with ROCm is determined by its underlying library
+dependencies. These critical ROCm components affect the capabilities,
+performance, and feature set available to developers. The versions described
+are available in ROCm :version:`rocm_version`.

 .. list-table::
    :header-rows: 1
@@ -253,10 +210,10 @@ feature set available to developers.
        distributed training, which involves parallel reductions or
        operations like ``jax.numpy.cumsum`` can use rocThrust.

-Supported features
+Supported and unsupported features
 ===============================================================================

-The following table maps the public JAX API modules to their supported
+The following table maps GPU-accelerated JAX modules to their supported
 ROCm and JAX versions.

 .. list-table::
@@ -264,8 +221,8 @@ ROCm and JAX versions.

    * - Module
      - Description
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.numpy``
      - Implements the NumPy API, using the primitives in ``jax.lax``.
      - 0.1.56
@@ -293,11 +250,21 @@ ROCm and JAX versions.
        devices.
      - 0.3.20
      - 5.1.0
+    * - ``jax.dlpack``
+      - For exchanging tensor data between JAX and other libraries that support the
+        DLPack standard.
+      - 0.1.57
+      - 5.0.0
    * - ``jax.distributed``
      - Enables the scaling of computations across multiple devices on a single
        machine or across multiple machines.
      - 0.1.74
      - 5.0.0
+    * - ``jax.dtypes``
+      - Provides utilities for working with and managing data types in JAX
+        arrays and computations.
+      - 0.1.66
+      - 5.0.0
    * - ``jax.image``
      - Contains image manipulation functions like resize, scale and translation.
      - 0.1.57
@@ -311,10 +278,27 @@ ROCm and JAX versions.
        array.
      - 0.1.57
      - 5.0.0
+    * - ``jax.profiler``
+      - Contains JAX’s tracing and time profiling features.
+      - 0.1.57
+      - 5.0.0
    * - ``jax.stages``
      - Contains interfaces to stages of the compiled execution process.
      - 0.3.4
      - 5.0.0
+    * - ``jax.tree``
+      - Provides utilities for working with tree-like container data structures.
+      - 0.4.26
+      - 5.6.0
+    * - ``jax.tree_util``
+      - Provides utilities for working with nested data structures, or
+        ``pytrees``.
+      - 0.1.65
+      - 5.0.0
+    * - ``jax.typing``
+      - Provides JAX-specific static type annotations.
+      - 0.3.18
+      - 5.1.0
    * - ``jax.extend``
      - Provides modules for access to JAX internal machinery module. The
        ``jax.extend`` module defines a library view of some of JAX’s internal
@@ -350,8 +334,8 @@ A SciPy-like API for scientific computing.
    :header-rows: 1

    * - Module
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.scipy.cluster``
      - 0.3.11
      - 5.1.0
@@ -396,8 +380,8 @@ jax.scipy.stats module
   :header-rows: 1

   * - Module
-     - As of JAX
-     - As of ROCm
+     - Since JAX
+     - Since ROCm
   * - ``jax.scipy.stats.bernouli``
     - 0.1.56
     - 5.0.0
@@ -480,8 +464,8 @@ Modules for JAX extensions.
    :header-rows: 1

    * - Module
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.extend.ffi``
      - 0.4.30
      - 6.0.0
@@ -495,25 +479,190 @@ Modules for JAX extensions.
      - 0.4.15
      - 5.5.0

-Unsupported JAX features
-===============================================================================
+jax.experimental module
+-------------------------------------------------------------------------------

-The following GPU-accelerated JAX features are not supported by ROCm for
-the listed supported JAX versions.
+Experimental modules and APIs.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.checkify``
+      - 0.1.75
+      - 5.0.0
+    * - ``jax.experimental.compilation_cache.compilation_cache``
+      - 0.1.68
+      - 5.0.0
+    * - ``jax.experimental.custom_partitioning``
+      - 0.4.0
+      - 5.3.0
+    * - ``jax.experimental.jet``
+      - 0.1.56
+      - 5.0.0
+    * - ``jax.experimental.key_reuse``
+      - 0.4.26
+      - 5.6.0
+    * - ``jax.experimental.mesh_utils``
+      - 0.1.76
+      - 5.0.0
+    * - ``jax.experimental.multihost_utils``
+      - 0.3.2
+      - 5.0.0
+    * - ``jax.experimental.pallas``
+      - 0.4.15
+      - 5.5.0
+    * - ``jax.experimental.pjit``
+      - 0.1.61
+      - 5.0.0
+    * - ``jax.experimental.serialize_executable``
+      - 0.4.0
+      - 5.3.0
+    * - ``jax.experimental.shard_map``
+      - 0.4.3
+      - 5.3.0
+    * - ``jax.experimental.sparse``
+      - 0.1.75
+      - 5.0.0
+
+.. list-table::
+    :header-rows: 1
+
+    * - API
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.enable_x64``
+      - 0.1.60
+      - 5.0.0
+    * - ``jax.experimental.disable_x64``
+      - 0.1.60
+      - 5.0.0
+
+jax.experimental.pallas module
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Module for Pallas, a JAX extension for custom kernels.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.pallas.mosaic_gpu``
+      - 0.4.31
+      - 6.1.3
+    * - ``jax.experimental.pallas.tpu``
+      - 0.4.15
+      - 5.5.0
+    * - ``jax.experimental.pallas.triton``
+      - 0.4.32
+      - 6.1.3
+
+jax.experimental.sparse module
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Experimental support for sparse matrix operations.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.sparse.linalg``
+      - 0.3.15
+      - 5.2.0
+    * - ``jax.experimental.sparse.sparsify``
+      - 0.3.25
+      - ❌
+
+.. list-table::
+    :header-rows: 1
+
+    * - ``sparse`` data structure API
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.sparse.BCOO``
+      - 0.1.72
+      - 5.0.0
+    * - ``jax.experimental.sparse.BCSR``
+      - 0.3.20
+      - 5.1.0
+    * - ``jax.experimental.sparse.CSR``
+      - 0.1.75
+      - 5.0.0
+    * - ``jax.experimental.sparse.NM``
+      - 0.4.27
+      - 5.6.0
+    * - ``jax.experimental.sparse.COO``
+      - 0.1.75
+      - 5.0.0
+
+Unsupported JAX features
+------------------------
+
+The following are GPU-accelerated JAX features not currently supported by
+ROCm.

 .. list-table::
    :header-rows: 1

    * - Feature
      - Description
-
+      - Since JAX
    * - Mixed Precision with TF32
      - Mixed precision with TF32 is used for matrix multiplications,
        convolutions, and other linear algebra operations, particularly in
        deep learning workloads like CNNs and transformers.
-
+      - 0.2.25
+    * - RNN support
+      - Currently only LSTM with double bias is supported with float32 input
+        and weight.
+      - 0.3.25
    * - XLA int4 support
      - 4-bit integer (int4) precision in the XLA compiler.
+      - 0.4.0
+    * - ``jax.experimental.sparsify``
+      - Converts a dense matrix to a sparse matrix representation.
+      - Experimental

-    * - MOSAIC (GPU)
-      - Mosaic is a library of kernel-building abstractions for JAX's Pallas system
+Use cases and recommendations
+================================================================================
+
+* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
+  blog explores the implementation and training of a Generative Pre-trained
+  Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s PyTorch-based
+  nanoGPT. By comparing how essential GPT components—such as self-attention
+  mechanisms and optimizers—are realized in PyTorch and JAX, also highlight
+  JAX’s unique features.
+
+* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
+  ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
+  blog post provides a comprehensive guide on enhancing the training efficiency
+  of GPT models by implementing mixed precision techniques in JAX, specifically
+  tailored for AMD GPUs utilizing the ROCm platform.
+
+* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
+  blog demonstrates how to develop a custom fused dropout-activation kernel for
+  matrices using Triton, integrate it with JAX, and benchmark its performance
+  using ROCm.
+
+* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
+  outlines the process of fine-tuning a Bidirectional Encoder Representations
+  from Transformers (BERT)-based large language model (LLM) using JAX for a text
+  classification task. The blog post discuss techniques for parallelizing the
+  fine-tuning across multiple AMD GPUs and assess the model's performance on a
+  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
+  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
+  used on a multi-GPU setup.
+
+* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
+  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
+  accelerator using ROCm. The page is aimed at helping users achieve optimal
+  performance for deep learning and other high-performance computing tasks on
+  the MI300X GPU.
+
+For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
--- a/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
@@ -21,68 +21,31 @@ release cycles for PyTorch on ROCm:

 - ROCm PyTorch release:

-  - Provides the latest version of ROCm but might not necessarily support the
-    latest stable PyTorch version.
+  - Provides the latest version of ROCm but doesn't immediately support the latest stable PyTorch
+    version.

  - Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
-    preinstalled.
+    pre-installed.

  - ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`_

-  - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
-    to get started.
+  - See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` to get started.

 - Official PyTorch release:

-  - Provides the latest stable version of PyTorch  but might not necessarily
-    support the latest ROCm version.
+  - Provides the latest stable version of PyTorch but doesn't immediately support the latest ROCm version.

  - Official PyTorch repository: `<https://github.com/pytorch/pytorch>`_

  - See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`_
-    or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_
-    to get started.
+    or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_ to get started.

-PyTorch includes tooling that generates HIP source code from the CUDA backend.
-This approach allows PyTorch to support ROCm without requiring manual code
-modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
+The upstream PyTorch includes an automatic HIPification solution that automatically generates HIP
+source code from the CUDA backend. This approach allows PyTorch to support ROCm without requiring
+manual code modifications.

-ROCm development is aligned with the stable release of PyTorch, while upstream
-PyTorch testing uses the stable release of ROCm to maintain consistency.
-
-.. _pytorch-recommendations:
-
-Use cases and recommendations
-================================================================================
-
-* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
-  guides how to leverage the ROCm platform for training AI models. It covers the
-  steps, tools, and best practices for optimizing training workflows on AMD GPUs
-  using PyTorch features.
-
-* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
-  describes and demonstrates how to use the ROCm platform for the fine-tuning
-  and inference of machine learning models, particularly large language models
-  (LLMs), on systems with a single GPU. This topic provides a detailed guide for
-  setting up, optimizing, and executing fine-tuning and inference workflows in
-  such environments.
-
-* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
-  describes and demonstrates the fine-tuning and inference of machine learning
-  models on systems with multiple GPUs.
-
-* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
-  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  accelerator using ROCm. This guide helps users achieve optimal performance for
-  deep learning and other high-performance computing tasks on the MI300X
-  accelerator.
-
-* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
-  describes how PyTorch integrates with ROCm for AI workloads It outlines the
-  use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
-  GPU hardware for training and inference tasks in AI applications.
-
-For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
+Development of ROCm is aligned with the stable release of PyTorch while upstream PyTorch testing uses
+the stable release of ROCm to maintain consistency.

 .. _pytorch-docker-compat:

@@ -93,10 +56,10 @@ Docker image compatibility

   <i class="fab fa-docker"></i>

-AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
-with ROCm backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
-Click |docker-icon| to view the image on Docker Hub.
+AMD validates and publishes ready-made `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
+with ROCm backends on Docker Hub. The following Docker image tags and
+associated inventories are validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_.
+Click the |docker-icon| icon to view the image on Docker Hub.

 .. list-table:: PyTorch Docker image components
    :header-rows: 1
@@ -116,84 +79,9 @@ Click |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-ab1d350b818b90123cfda31363019d11c0d41a8f12a19e3cb2cb40cf0261137d"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu24.04_py3.12_pytorch_release_2.4.0/images/sha256-6c798857b2c9526b44ba535710b93a1737546acea79b53a93c646195c272f1d5"><i class="fab fa-docker fa-lg"></i></a>

-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
-      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
-      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
-      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-130536fdfceb374626a7bcb8d00b9d796ddfc3115677d51229e5b852d96b5ef4"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
-      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
-      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
-      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
-      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
-      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-20a2e24b4738dc1f1a44a04f23827918b56c99f7e697e6fccb90e9c4fae8ca9b"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
-      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
-      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
-      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-f09cb8ca39cc39222fb554060711f5c19130f7b4047aaf41fad4ba3ec470ca03"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
-      - 22.04
-      - `3.11.9 <https://www.python.org/downloads/release/python-3119/>`_
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
-      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
-      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
-      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-a91c100d1fe608dae3eb7f60a751630363d4027ac3d077d428e92945204c338e"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
-      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
-      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
-      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
-      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
-      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-66a89ce6485bb887af74bb9bd76bb613ab9834a6b1374649ea7ae379883454a4"><i class="fab fa-docker fa-lg"></i></a>
-
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
+      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
@@ -206,29 +94,74 @@ Click |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-c716cf167e6e49893f11de03606ed37044153aca089e74ca615065c06877f86b"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.4.0/images/sha256-a09b21248133876fc8912a5ff4e6ee2c8d62b14120313e426b3dadda5702713d"><i class="fab fa-docker fa-lg"></i></a>

-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
+      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
+      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_2.4.0/images/sha256-963187534467f0f9da77996762fc1d112a6faa5372277c348a505533e7876ec8"><i class="fab fa-docker fa-lg"></i></a>
+
+      - `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
+      - 22.04
+      - `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
+      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
+      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
+      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
+      - `master <https://bitbucket.org/icl/magma/src/master/>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
+      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-952f2621bd2bf3078bef19061e05b209105a82a7908e7e6cdf85014938a4d93a"><i class="fab fa-docker fa-lg"></i></a>
+
+      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
+      - 22.04
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
+      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
+      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
+      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
+      - `master <https://bitbucket.org/icl/magma/src/master/>`_
      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-0434cbc9b07b2c26e39480d7447f676f9057a1054dcff00e0050c25a6eddbd3c"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.2.1/images/sha256-a2fe20e170feb9e05da3e5728bb98e40d08567e137be8e6ba797962ed2852608"><i class="fab fa-docker fa-lg"></i></a>

-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
-      - 24.04
-      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
-      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
-      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
+      - `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
+      - 22.04
+      - `3.10 <https://www.python.org/downloads/release/python-31016/>`_
+      - `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
+      - `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
+      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
+      - `master <https://bitbucket.org/icl/magma/src/master/>`_
+      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
+      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
+      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_2.2.1/images/sha256-7f231937c897cca5f89e360be33c70a2017d60f62d1fbe81292be48c15fe345b"><i class="fab fa-docker fa-lg"></i></a>
+
+      - `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
+      - 20.04
+      - `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
+      - `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
+      - `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
+      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
@@ -236,25 +169,41 @@ Click |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-688b1c0073092615fb98778d78b16191e506097ee116a2d3d2628b264d5d367b"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_1.13.1/images/sha256-616a47758004f91951e2da6c1fe291f903de65a7b2318d4b18359b48fe3032f4"><i class="fab fa-docker fa-lg"></i></a>

-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
+      - `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
      - 22.04
-      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
-      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
-      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
+      - `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
+      - `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
+      - `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
+      - `2.19.0 <https://github.com/tensorflow/tensorboard/tree/2.19>`_
+      - `master <https://bitbucket.org/icl/magma/src/master/>`_
+      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
+      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
+      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_1.13.1/images/sha256-a2cfb365aea58b84595e241ffdb0d5ef3e6566e98c10b5499f4aa29983a74ea2"><i class="fab fa-docker fa-lg"></i></a>
+
+      - `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
+      - 20.04
+      - `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
+      - `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
+      - `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
+      - `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

-Key ROCm libraries for PyTorch
+Critical ROCm libraries for PyTorch
 ================================================================================

-PyTorch functionality on ROCm is determined by its underlying library
-dependencies. These ROCm components affect the capabilities, performance, and
-feature set available to developers.
+The functionality of PyTorch with ROCm is determined by its underlying library
+dependencies. These critical ROCm components affect the capabilities,
+performance, and feature set available to developers. The versions described
+are available in ROCm :version:`rocm_version`.

 .. list-table::
    :header-rows: 1
@@ -274,23 +223,24 @@ feature set available to developers.
      - :version-ref:`hipBLAS rocm_version`
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
-      - Supports operations such as matrix multiplication, matrix-vector
-        products, and tensor contractions. Utilized in both dense and batched
-        linear algebra operations.
+      - Supports operations like matrix multiplication, matrix-vector products,
+        and tensor contractions. Utilized in both dense and batched linear
+        algebra operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
      - :version-ref:`hipBLASLt rocm_version`
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
-      - Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
+      - It accelerates operations like ``torch.matmul``, ``torch.mm``, and the
        matrix multiplications used in convolutional and linear layers.
    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
      - :version-ref:`hipCUB rocm_version`
      - Provides a C++ template library for parallel algorithms for reduction,
        scan, sort and select.
-      - Supports operations such as ``torch.sum``, ``torch.cumsum``,
-        ``torch.sort`` irregular shapes often involve scanning, sorting, and
-        filtering, which hipCUB handles efficiently.
+      - Supports operations like ``torch.sum``, ``torch.cumsum``, ``torch.sort``
+        and ``torch.topk``. Operations on sparse tensors or tensors with
+        irregular shapes often involve scanning, sorting, and filtering, which
+        hipCUB handles efficiently.
    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
      - :version-ref:`hipFFT rocm_version`
      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
@@ -298,8 +248,8 @@ feature set available to developers.
    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
      - :version-ref:`hipRAND rocm_version`
      - Provides fast random number generation for GPUs.
-      - The ``torch.rand``, ``torch.randn``, and stochastic layers like
-        ``torch.nn.Dropout`` rely on hipRAND.
+      - The ``torch.rand``, ``torch.randn`` and stochastic layers like
+        ``torch.nn.Dropout``.
    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
      - :version-ref:`hipSOLVER rocm_version`
      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
@@ -370,7 +320,7 @@ feature set available to developers.
      - :version-ref:`RPP rocm_version`
      - Speeds up data augmentation, transformation, and other preprocessing steps.
      - Easy to integrate into PyTorch's ``torch.utils.data`` and
-        ``torchvision`` data load workloads to speed up data processing.
+        ``torchvision`` data load workloads.
    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
      - :version-ref:`rocThrust rocm_version`
      - Provides a C++ template library for parallel algorithms like sorting,
@@ -387,11 +337,11 @@ feature set available to developers.
        involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
        more.

-Supported features
+Supported and unsupported features
 ================================================================================

-This section maps GPU-accelerated PyTorch features to their supported ROCm and
-PyTorch versions.
+The following section maps GPU-accelerated PyTorch features to their supported
+ROCm and PyTorch versions.

 torch
 --------------------------------------------------------------------------------
@@ -399,24 +349,23 @@ torch
 `torch <https://pytorch.org/docs/stable/index.html>`_ is the central module of
 PyTorch, providing data structures for multi-dimensional tensors and
 implementing mathematical operations on them. It also includes utilities for
-efficient serialization of tensors and arbitrary data types and other tools.
+efficient serialization of tensors and arbitrary data types, along with various
+other tools.

 Tensor data types
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-The tensor data type is specified using the ``dtype`` attribute or argument. 
-PyTorch supports many data types for different use cases.
+The data type of a tensor is specified using the ``dtype`` attribute or argument, and PyTorch supports a wide range of data types for different use cases.

-The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_
-single data types:
+The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_'s single data types:

 .. list-table::
    :header-rows: 1

    * - Data type
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - ``torch.float8_e4m3fn``
      - 8-bit floating point, e4m3
      - 2.3
@@ -508,11 +457,11 @@ single data types:

 .. note::

-  Unsigned types except ``uint8`` have limited support in eager mode. They
-  primarily exist to assist usage with ``torch.compile``.
+  Unsigned types aside from ``uint8`` are currently only have limited support in
+  eager mode (they primarily exist to assist usage with ``torch.compile``).

-  See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
-  native hardware support of data types.
+  The :doc:`ROCm precision support page <rocm:reference/precision-support>`
+  collected the native HW support of different data types.

 torch.cuda
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -527,8 +476,8 @@ leveraging ROCm and CUDA as the underlying frameworks.

    * - Feature
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - Device management
      - Utilities for managing and interacting with GPUs.
      - 0.4.0
@@ -602,8 +551,8 @@ PyTorch interacts with the ROCm or CUDA environment.

    * - Feature
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - ``cufft_plan_cache``
      - Manages caching of GPU FFT plans to optimize repeated FFT computations.
      - 1.7.0
@@ -651,8 +600,8 @@ Supported ``torch`` options include:

    * - Option
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - ``allow_tf32``
      - TensorFloat-32 tensor cores may be used in cuDNN convolutions on NVIDIA
        Ampere or newer GPUs.
@@ -667,28 +616,28 @@ Supported ``torch`` options include:
 Automatic mixed precision: torch.amp
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-PyTorch automates the process of using both 16-bit (half-precision, float16) and
-32-bit (single-precision, float32) floating-point types in model training and
-inference.
+PyTorch that automates the process of using both 16-bit (half-precision,
+float16) and 32-bit (single-precision, float32) floating-point types in model
+training and inference.

 .. list-table::
    :header-rows: 1

    * - Feature
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - Autocasting
-      - Autocast instances serve as context managers or decorators that allow
+      - Instances of autocast serve as context managers or decorators that allow
        regions of your script to run in mixed precision.
      - 1.9
      - 2.5
    * - Gradient scaling
      - To prevent underflow, “gradient scaling” multiplies the network’s
-        loss by a scale factor and invokes a backward pass on the scaled
-        loss. The same factor then scales gradients flowing backward through
-        the network. In other words, gradient values have a larger magnitude so
-        that they don’t flush to zero.
+        loss(es) by a scale factor and invokes a backward pass on the scaled
+        loss(es). Gradients flowing backward through the network are then
+        scaled by the same factor. In other words, gradient values have a
+        larger magnitude, so they don’t flush to zero.
      - 1.9
      - 2.5
    * - CUDA op-specific behavior
@@ -702,7 +651,7 @@ inference.
 Distributed library features
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-PyTorch distributed library includes a collective of parallelism modules, a
+The PyTorch distributed library includes a collective of parallelism modules, a
 communications layer, and infrastructure for launching and debugging large
 training jobs. See :ref:`rocm-for-ai-pytorch-distributed` for more information.

@@ -716,13 +665,13 @@ of computational resources and scalability for large-scale tasks.

    * - Feature
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - TensorPipe
      - A point-to-point communication library integrated into
-        PyTorch for distributed training. It handles tensor data transfers
-        efficiently between different processes or devices, including those on
-        separate machines.
+        PyTorch for distributed training. It is designed to handle tensor data
+        transfers efficiently between different processes or devices, including
+        those on separate machines.
      - 1.8
      - 5.4
    * - Gloo
@@ -741,8 +690,8 @@ torch.compiler

    * - Feature
      - Description
-      - As of PyTorch
-      - As of ROCm
+      - Since PyTorch
+      - Since ROCm
    * - ``torch.compiler`` (AOT Autograd)
      - Autograd captures not only the user-level code, but also backpropagation,
        which results in capturing the backwards pass “ahead-of-time”. This
@@ -765,8 +714,8 @@ The `torchaudio <https://pytorch.org/audio/stable/index.html>`_ library provides
 utilities for processing audio data in PyTorch, such as audio loading,
 transformations, and feature extraction.

-To ensure GPU-acceleration with ``torchaudio.transforms``, you need to
-explicitly move audio data (waveform tensor) to GPU using ``.to('cuda')``.
+To ensure GPU-acceleration with ``torchaudio.transforms``, you need to move audio
+data (waveform tensor) explicitly to GPU using ``.to('cuda')``.

 The following ``torchaudio`` features are GPU-accelerated.

@@ -775,10 +724,10 @@ The following ``torchaudio`` features are GPU-accelerated.

    * - Feature
      - Description
-      - As of torchaudio version
-      - As of ROCm
+      - Since torchaudio version
+      - Since ROCm
    * - ``torchaudio.transforms.Spectrogram``
-      - Generate a spectrogram of an input waveform using STFT.
+      - Generates spectrogram of an input waveform using STFT.
      - 0.6.0
      - 4.5
    * - ``torchaudio.transforms.MelSpectrogram``
@@ -798,7 +747,7 @@ torchvision
 --------------------------------------------------------------------------------

 The `torchvision <https://pytorch.org/vision/stable/index.html>`_ library
-provides datasets, model architectures, and common image transformations for
+provide datasets, model architectures, and common image transformations for
 computer vision.

 The following ``torchvision`` features are GPU-accelerated.
@@ -808,8 +757,8 @@ The following ``torchvision`` features are GPU-accelerated.

    * - Feature
      - Description
-      - As of torchvision version
-      - As of ROCm
+      - Since torchvision version
+      - Since ROCm
    * - ``torchvision.transforms.functional``
      - Provides GPU-compatible transformations for image preprocessing like
        resize, normalize, rotate and crop.
@@ -855,7 +804,7 @@ torchtune
 The `torchtune <https://pytorch.org/torchtune/stable/index.html>`_ library for
 authoring, fine-tuning and experimenting with LLMs.

-* Usage: Enabling developers to fine-tune ROCm PyTorch solutions.
+* Usage: It works out-of-the-box, enabling developers to fine-tune ROCm PyTorch solutions.

 * Only official release exists.

@@ -866,8 +815,7 @@ The `torchserve <https://pytorch.org/serve/>`_ is a PyTorch domain library
 for common sparsity and parallelism primitives needed for large-scale recommender
 systems.

-* torchtext does not implement its own kernels. ROCm support is enabled by
-  linking against ROCm libraries.
+* torchtext does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.

 * Only official release exists.

@@ -878,16 +826,14 @@ The `torchrec <https://pytorch.org/torchrec/>`_ is a PyTorch domain library for
 common sparsity and parallelism primitives needed for large-scale recommender
 systems.

-* torchrec does not implement its own kernels. ROCm support is enabled by
-  linking against ROCm libraries.
+* torchrec does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.

 * Only official release exists.

 Unsupported PyTorch features
-================================================================================
+----------------------------

-The following GPU-accelerated PyTorch features are not supported by ROCm for
-the listed supported PyTorch versions.
+The following are GPU-accelerated PyTorch features not currently supported by ROCm.

 .. list-table::
    :widths: 30, 60, 10
@@ -895,7 +841,7 @@ the listed supported PyTorch versions.

    * - Feature
      - Description
-      - As of PyTorch
+      - Since PyTorch
    * - APEX batch norm
      - Use APEX batch norm instead of PyTorch batch norm.
      - 1.6.0
@@ -951,3 +897,31 @@ the listed supported PyTorch versions.
        utilized effectively through custom CUDA extensions or advanced
        workflows.
      - Not a core feature
+
+Use cases and recommendations
+================================================================================
+
+* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/train-a-model>` provides
+  guidance on how to leverage the ROCm platform for training AI models. It covers the steps, tools, and best practices
+  for optimizing training workflows on AMD GPUs using PyTorch features.
+
+* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
+  describes and demonstrates how to use the ROCm platform for the fine-tuning and inference of
+  machine learning models, particularly large language models (LLMs), on systems with a single AMD
+  Instinct MI300X accelerator. This page provides a detailed guide for setting up, optimizing, and
+  executing fine-tuning and inference workflows in such environments.
+
+* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
+  describes and demonstrates the fine-tuning and inference of machine learning models on systems
+  with multi MI300X accelerators.
+
+* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>` provides detailed
+  guidance on optimizing workloads for the AMD Instinct MI300X accelerator using ROCm. This guide is aimed at helping
+  users achieve optimal performance for deep learning and other high-performance computing tasks on the MI300X
+  accelerator.
+
+* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
+  describes how PyTorch integrates with ROCm for AI workloads It outlines the use of PyTorch on the ROCm platform and
+  focuses on how to efficiently leverage AMD GPU hardware for training and inference tasks in AI applications.
+
+For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -56,7 +56,7 @@ Docker image compatibility
 AMD validates and publishes ready-made `TensorFlow images
 <https://hub.docker.com/r/rocm/tensorflow>`_ with ROCm backends on
 Docker Hub. The following Docker image tags and associated inventories are
-validated for `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click
+validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_. Click
 the |docker-icon| icon to view the image on Docker Hub.

 .. list-table:: TensorFlow Docker image components
@@ -64,91 +64,57 @@ the |docker-icon| icon to view the image on Docker Hub.

    * - Docker image
      - TensorFlow
-      - Ubuntu
      - Dev
      - Python
      - TensorBoard

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-dev/images/sha256-fa9cf5fa6c6079a7118727531ccd0056c6e3224a42c3d6e78a49e7781daafff4"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
+      - `rocm/tensorflow`__
      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - dev
-      - 24.04
      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-runtime/images/sha256-14addca4b92a47c806b83ebaeed593fc6672cd99f0017ed8dad759fe72ed0309"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 24.04
-      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-dev/images/sha256-f5e151060df04ff5fb59f5604b49cd371931bbe75b06aec9fe7781397c4be0ce"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
+      - `rocm/tensorflow`__
      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - dev
-      - 22.04
      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-runtime/images/sha256-5cd4c03fdb1036570c0d4929da60a65c4466998dc80f1dc8a5a0b173eae017fb"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 22.04
-      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-dev/images/sha256-b3add80e374a2db2d1088d746e740afa89d439aca02cacba959ad298f5cd2b3f"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
+      - `rocm/tensorflow`__
      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - dev
-      - 24.04
      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-runtime/images/sha256-3a244f026c32177eff7958ffbad390de85b438b2b48b455cc39f15d70fa1270d"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 24.04
-      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-dev/images/sha256-e0cecdfacb59169335049983cdab6da578c209bb9f4d08aad97e184ae59171a6"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
+      - `rocm/tensorflow`__
      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - dev
-      - 22.04
      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-runtime/images/sha256-6f43de12f7eb202791b698ac51d28b72098de90034dbcd48486629b0125f7707"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+      - `rocm/tensorflow`__
+      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
+      - dev
+      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
+      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_

-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 22.04
+    * - .. raw:: html
+
+      - `rocm/tensorflow`__
+      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
+      - dev
      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
-      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
-
+      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_

 Critical ROCm libraries for TensorFlow
 ===============================================================================
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -51,15 +51,12 @@ article_pages = [
    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
@@ -69,10 +66,10 @@ article_pages = [
    {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/install", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/vllm-benchmark", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/pytorch-inference-benchmark", "os": ["linux"]},
    {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},

    {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
--- a/docs/data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
@@ -1,25 +0,0 @@
-pytorch_inference_benchmark:
-  unified_docker:
-    latest: &rocm-pytorch-docker-latest
-      pull_tag: rocm/pytorch:latest
-      docker_hub_url:
-      rocm_version:
-      pytorch_version:
-      hipblaslt_version:
-  model_groups:
-    - group: CLIP
-      tag: clip
-      models:
-      - model: CLIP
-        mad_tag: pyt_clip_inference
-        model_repo: laion/CLIP-ViT-B-32-laion2B-s34B-b79K
-        url: https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K
-        precision: float16
-    - group: Chai-1
-      tag: chai
-      models:
-      - model: Chai-1
-        mad_tag: pyt_chai1_inference
-        model_repo: meta-llama/Llama-3.1-8B-Instruct
-        url: https://huggingface.co/chaidiscovery/chai-1
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,10 +1,10 @@
 vllm_benchmark:
  unified_docker:
    latest:
-      pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250415
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845
+      pull_tag: rocm/vllm:instinct_main
+      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250311/images/sha256-de0a2649b735f45b7ecab8813eb7b19778ae1f40591ca1196b07bc29c42ed4a3
      rocm_version: 6.3.1
-      vllm_version: 0.8.3
+      vllm_version: 0.7.3
      pytorch_version: 2.7.0 (dev nightly)
      hipblaslt_version: 0.13
  model_groups:
@@ -102,12 +102,19 @@ vllm_benchmark:
        model_repo: Qwen/Qwen2-72B-Instruct
        url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
        precision: float16
-      - model: QwQ-32B
-        mad_tag: pyt_vllm_qwq-32b
-        model_repo: Qwen/QwQ-32B
-        url: https://huggingface.co/Qwen/QwQ-32B
+    - group: JAIS
+      tag: jais
+      models:
+      - model: JAIS 13B
+        mad_tag: pyt_vllm_jais-13b
+        model_repo: core42/jais-13b-chat
+        url: https://huggingface.co/core42/jais-13b-chat
+        precision: float16
+      - model: JAIS 30B
+        mad_tag: pyt_vllm_jais-30b
+        model_repo: core42/jais-30b-chat-v3
+        url: https://huggingface.co/core42/jais-30b-chat-v3
        precision: float16
-        tunableop: true
    - group: DBRX
      tag: dbrx
      models:
--- a/docs/data/rocm-software-stack-6_4_0.jpg
+++ b/docs/data/rocm-software-stack-6_4_0.jpg
--- a/docs/how-to/rocm-for-ai/inference-optimization/model-quantization.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/model-quantization.rst
@@ -1,178 +1,15 @@
 .. meta::
   :description: How to use model quantization techniques to speed up inference.
-   :keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, Quark, GPTQ, transformers, bitsandbytes
+   :keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, GPTQ, transformers, bitsandbytes

 *****************************
 Model quantization techniques
 *****************************

 Quantization reduces the model size compared to its native full-precision version, making it easier to fit large models
-onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using AMD Quark, GPTQ
+onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using GPTQ
 and bitsandbytes on AMD Instinct hardware.

-.. _quantize-llms-quark:
-
-AMD Quark
-=========
-
-`AMD Quark <https://quark.docs.amd.com/latest/>`_ offers the leading efficient and scalable quantization solution tailored to AMD Instinct GPUs. It supports ``FP8`` and ``INT8`` quantization for activations, weights, and KV cache, 
-including ``FP8`` attention. For very large models, it employs a two-level ``INT4-FP8`` scheme—storing weights in ``INT4`` while computing with ``FP8``—for nearly 4× compression without sacrificing accuracy. 
-Quark scales efficiently across multiple GPUs, efficiently handling ultra-large models like Llama-3.1-405B. Quantized ``FP8`` models like Llama, Mixtral, and Grok-1 are available under the `AMD organization on Hugging Face <https://huggingface.co/collections/amd/quark-quantized-ocp-fp8-models-66db7936d18fcbaf95d4405c>`_, and can be deployed directly via `vLLM <https://github.com/vllm-project/vllm/tree/main/vllm>`_.
-
-Installing Quark
-------------------
-
-The latest release of Quark can be installed with pip
-
-.. code-block:: shell
-
-    pip install amd-quark
-
-For detailed installation instructions, refer to the `Quark documentation <https://quark.docs.amd.com/latest/install.html>`_.
-
-
-Using Quark for quantization
-----------------------------
-
-#. First, load the pre-trained model and its corresponding tokenizer using the Hugging Face ``transformers`` library.
-
-   .. code-block:: python
-
-      from transformers import AutoTokenizer, AutoModelForCausalLM
-
-      MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
-      MAX_SEQ_LEN = 512
-
-      model = AutoModelForCausalLM.from_pretrained(
-          MODEL_ID, device_map="auto", torch_dtype="auto",
-      )
-      model.eval()
-
-      tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, model_max_length=MAX_SEQ_LEN)
-      tokenizer.pad_token = tokenizer.eos_token
-
-#. Prepare the calibration DataLoader (static quantization requires calibration data).
-
-   .. code-block:: python
-
-      from datasets import load_dataset
-      from torch.utils.data import DataLoader
-
-      BATCH_SIZE = 1
-      NUM_CALIBRATION_DATA = 512
-
-      dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
-      text_data = dataset["text"][:NUM_CALIBRATION_DATA]
-
-      tokenized_outputs = tokenizer(
-      text_data, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SEQ_LEN
-      )
-      calib_dataloader = DataLoader(
-      tokenized_outputs['input_ids'], batch_size=BATCH_SIZE, drop_last=True
-      )
-
-#. Define the quantization configuration. See the comments in the following code snippet for descriptions of each configuration option.
-
-   .. code-block:: python
-
-      from quark.torch.quantization import (Config, QuantizationConfig,
-                                           FP8E4M3PerTensorSpec)
-
-      # Define fp8/per-tensor/static spec.
-      FP8_PER_TENSOR_SPEC = FP8E4M3PerTensorSpec(observer_method="min_max",
-          is_dynamic=False).to_quantization_spec()
-
-      # Define global quantization config, input tensors and weight apply FP8_PER_TENSOR_SPEC.
-      global_quant_config = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC,
-          weight=FP8_PER_TENSOR_SPEC)
-
-      # Define quantization config for kv-cache layers, output tensors apply FP8_PER_TENSOR_SPEC.
-      KV_CACHE_SPEC = FP8_PER_TENSOR_SPEC
-      kv_cache_layer_names_for_llama = ["*k_proj", "*v_proj"]
-      kv_cache_quant_config = {name :
-          QuantizationConfig(input_tensors=global_quant_config.input_tensors,
-                             weight=global_quant_config.weight,
-                             output_tensors=KV_CACHE_SPEC)
-          for name in kv_cache_layer_names_for_llama}
-      layer_quant_config = kv_cache_quant_config.copy()
-
-      EXCLUDE_LAYERS = ["lm_head"]
-      quant_config = Config(
-          global_quant_config=global_quant_config,
-          layer_quant_config=layer_quant_config,
-          kv_cache_quant_config=kv_cache_quant_config,
-          exclude=EXCLUDE_LAYERS)
-
-#. Quantize the model and export
-
-   .. code-block:: python
-
-      import torch
-      from quark.torch import ModelQuantizer, ModelExporter
-      from quark.torch.export import ExporterConfig, JsonExporterConfig
-
-      # Apply quantization.
-      quantizer = ModelQuantizer(quant_config)
-      quant_model = quantizer.quantize_model(model, calib_dataloader)
-
-      # Freeze quantized model to export.
-      freezed_model = quantizer.freeze(model)
-
-      # Define export config.
-      LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"]
-      export_config = ExporterConfig(json_export_config=JsonExporterConfig())
-      export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP
-
-      EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor"
-      exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR)
-      with torch.no_grad():
-          exporter.export_safetensors_model(freezed_model,
-              quant_config=quant_config, tokenizer=tokenizer)
-
-Evaluating the quantized model with vLLM
----------------------------------------
-
-The exported Quark-quantized model can be loaded directly by vLLM for inference. You need to specify the model path and inform vLLM about the quantization method (``quantization='quark'``) and the KV cache data type (``kv_cache_dtype='fp8'``).
-Use the ``LLM`` interface to load the model:
-
-.. code-block:: python
-
-   from vllm import LLM, SamplingParamsinterface
-
-   # Sample prompts.
-   prompts = [
-       "Hello, my name is",
-       "The president of the United States is",
-       "The capital of France is",
-       "The future of AI is",
-   ]
-   # Create a sampling params object.
-   sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-
-   # Create an LLM.
-   llm = LLM(model="Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor",
-             kv_cache_dtype='fp8',quantization='quark')
-   # Generate texts from the prompts. The output is a list of RequestOutput objects
-   # that contain the prompt, generated text, and other information.
-   outputs = llm.generate(prompts, sampling_params)
-   # Print the outputs.
-   print("\nGenerated Outputs:\n" + "-" * 60)
-   for output in outputs:
-       prompt = output.prompt
-       generated_text = output.outputs[0].text
-       print(f"Prompt:    {prompt!r}")
-       print(f"Output:    {generated_text!r}")
-       print("-" * 60)
-
-You can also evaluate the quantized model's accuracy on standard benchmarks using the `lm-evaluation-harness <https://github.com/EleutherAI/lm-evaluation-harness>`_. Pass the necessary vLLM arguments to ``lm_eval`` via ``--model_args``.
-
-.. code-block:: shell
-
-   lm_eval --model vllm \
-     --model_args pretrained=Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor,kv_cache_dtype='fp8',quantization='quark' \
-     --tasks gsm8k
-
-This provides a standardized way to measure the performance impact of quantization.
 .. _fine-tune-llms-gptq:

 GPTQ
@@ -196,7 +33,7 @@ The AutoGPTQ library implements the GPTQ algorithm.
   .. code-block:: shell

      # This will install pre-built wheel for a specific ROCm version.
-
+      
      pip install auto-gptq --no-build-isolation --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm573/

   Or, install AutoGPTQ from source for the appropriate ROCm version (for example, ROCm 6.1).
@@ -206,10 +43,10 @@ The AutoGPTQ library implements the GPTQ algorithm.
      # Clone the source code.
      git clone https://github.com/AutoGPTQ/AutoGPTQ.git
      cd AutoGPTQ
-
+      
      # Speed up the compilation by specifying PYTORCH_ROCM_ARCH to target device.
      PYTORCH_ROCM_ARCH=gfx942 ROCM_VERSION=6.1 pip install .
-
+      
      # Show the package after the installation 

 #. Run ``pip show auto-gptq`` to print information for the installed ``auto-gptq`` package. Its output should look like
@@ -275,7 +112,7 @@ Using GPTQ with Hugging Face Transformers
   .. code-block:: python

      from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
-
+      
      base_model_name = " NousResearch/Llama-2-7b-hf"
      tokenizer = AutoTokenizer.from_pretrained(base_model_name)
      gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
@@ -375,10 +212,10 @@ To get started with bitsandbytes primitives, use the following code as reference
 .. code-block:: python

   import bitsandbytes as bnb
-
+   
   # Use Int8 Matrix Multiplication
   bnb.matmul(..., threshold=6.0)
-
+   
   # Use bitsandbytes 8-bit Optimizers
   adam = bnb.optim.Adam8bit(model.parameters(), lr=0.001, betas=(0.9, 0.995))

@@ -390,14 +227,14 @@ To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndByt
 .. code-block:: python

   from transformers import AutoModelForCausalLM, BitsAndBytesConfig
-
+   
   base_model_name = "NousResearch/Llama-2-7b-hf"
   quantization_config = BitsAndBytesConfig(load_in_4bit=True)
   bnb_model_4bit = AutoModelForCausalLM.from_pretrained(
           base_model_name, 
           device_map="auto", 
           quantization_config=quantization_config)
-
+   
   # Check the memory footprint with get_memory_footprint method
   print(bnb_model_4bit.get_memory_footprint())

@@ -406,9 +243,9 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
 .. code-block:: python

   from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-
+   
   base_model_name = "NousResearch/Llama-2-7b-hf"
-
+   
   tokenizer = AutoTokenizer.from_pretrained(base_model_name)
   quantization_config = BitsAndBytesConfig(load_in_8bit=True)
   tokenizer = AutoTokenizer.from_pretrained(base_model_name)
@@ -416,7 +253,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
           base_model_name, 
           device_map="auto", 
           quantization_config=quantization_config)
-
+   
   prompt = "What is a large language model?"
   inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
   generated_ids = model.generate(**inputs)
--- a/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
+++ b/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
@@ -16,7 +16,8 @@ ROCm supports vLLM and Hugging Face TGI as major LLM-serving frameworks.
 Serving using vLLM
 ==================

-vLLM is a fast and easy-to-use library for LLM inference and serving. AMD is actively working with the vLLM team to improve performance and support the latest ROCm versions.
+vLLM is a fast and easy-to-use library for LLM inference and serving. vLLM officially supports ROCm versions 5.7 and
+6.0. AMD is actively working with the vLLM team to improve performance and support later ROCm versions.

 See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `official vLLM documentation
 <https://docs.vllm.ai/>`_ for more information.
@@ -30,9 +31,9 @@ vLLM installation
 vLLM supports two ROCm-capable installation methods. Refer to the official documentation use the following links.

 -  `Build from source with Docker
-   <https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-image-from-source>`_ (recommended)
+   <https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-docker-rocm>`_ (recommended)

-  `Build from source <https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-wheel-from-source>`_
+-  `Build from source <https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-rocm>`_

 vLLM walkthrough
 ----------------
--- a/docs/how-to/rocm-for-ai/inference/index.rst
+++ b/docs/how-to/rocm-for-ai/inference/index.rst
@@ -20,8 +20,6 @@ training, fine-tuning, and inference. It leverages popular machine learning fram

 - :doc:`LLM inference frameworks <llm-inference-frameworks>`

- :doc:`vLLM inference performance testing <vllm-benchmark>`
-
- :doc:`PyTorch inference performance testing <pytorch-inference-benchmark>`
+- :doc:`Performance testing <vllm-benchmark>`

 - :doc:`Deploying your model <deploy-your-model>`
--- a/docs/how-to/rocm-for-ai/inference/install.rst
+++ b/docs/how-to/rocm-for-ai/inference/install.rst
@@ -30,7 +30,7 @@ ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install

 * :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`

-* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`
+* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`.

 .. grid:: 1

@@ -59,8 +59,4 @@ images with the framework pre-installed.

 * :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`

-Next steps
-==========
-
-After installing ROCm and your desired ML libraries -- and before running AI workloads -- conduct system health benchmarks
-to test the optimal performance of your AMD hardware. See :doc:`system-health-check` to get started.
+The sections that follow in :doc:`Training a model <../training/train-a-model>` are geared for a ROCm with PyTorch installation.
--- a/docs/how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
+++ b/docs/how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
@@ -36,7 +36,7 @@ Installing vLLM

      git clone https://github.com/vllm-project/vllm.git
      cd vllm
-      docker build -f docker/Dockerfile.rocm -t vllm-rocm .
+      docker build -f Dockerfile.rocm -t vllm-rocm .

 .. tab-set::

--- a/docs/how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
+++ b/docs/how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
@@ -1,172 +0,0 @@
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm PyTorch Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate, pytorch
-
-*************************************
-PyTorch inference performance testing
-*************************************
-
-.. _pytorch-inference-benchmark-docker:
-
-.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
-
-   {% set unified_docker = data.pytorch_inference_benchmark.unified_docker.latest %}
-   {% set model_groups = data.pytorch_inference_benchmark.model_groups %}
-
-   The `ROCm PyTorch Docker <https://hub.docker.com/r/rocm/pytorch/tags>`_ image offers a prebuilt,
-   optimized environment for testing model inference performance on AMD Instinct™ MI300X series
-   accelerators. This guide demonstrates how to use the AMD Model Automation and Dashboarding (MAD)
-   tool with the ROCm PyTorch container to test inference performance on various models efficiently.
-
-   .. _pytorch-inference-benchmark-available-models:
-
-   Supported models
-   ================
-
-   .. raw:: html
-
-      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
-        <div class="row">
-          <div class="col-2 me-2 model-param-head">Model</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-            <div class="col-6 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-   {% endfor %}
-          </div>
-        </div>
-
-        <div class="row mt-1" style="display: none;">
-          <div class="col-2 me-2 model-param-head">Model variant</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-      {% set models = model_group.models %}
-      {% for model in models %}
-            <div class="col-12 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-      {% endfor %}
-   {% endfor %}
-          </div>
-        </div>
-      </div>
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. note::
-
-         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
-         Some models require access authorization before use via an external license agreement through a third party.
-
-      {% endfor %}
-   {% endfor %}
-
-   System validation
-   =================
-
-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
-
-   Pull the Docker image
-   =====================
-
-   .. container:: model-doc pyt_chai1_inference
-
-      Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue/images/sha256-b736a4239ab38a9d0e448af6d4adca83b117debed00bfbe33846f99c4540f79b>`_ from Docker Hub.
-
-      .. code-block:: shell
-
-         docker pull rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue
-
-      .. note::
-
-         The Chai-1 benchmark uses a specifically selected Docker image using ROCm 6.2.3 and PyTorch 2.3.0 to address an accuracy issue.
-
-   .. container:: model-doc pyt_clip_inference
-
-      Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/latest/images/sha256-05b55983e5154f46e7441897d0908d79877370adca4d1fff4899d9539d6c4969>`_ from Docker Hub.
-
-      .. code-block:: shell
-
-         docker pull rocm/pytorch:latest
-
-   .. _pytorch-benchmark-get-started:
-
-   Benchmarking
-   ============
-
-   .. _pytorch-inference-benchmark-mad:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      To simplify performance testing, the ROCm Model Automation and Dashboarding
-      (`<https://github.com/ROCm/MAD>`__) project provides ready-to-use scripts and configuration.
-      To start, clone the  MAD repository to a local directory and install the required packages on the
-      host machine.
-
-      .. code-block:: shell
-
-         git clone https://github.com/ROCm/MAD
-         cd MAD
-         pip install -r requirements.txt
-
-      Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-      using one GPU with the ``{{model.precision}}`` data type on the host machine.
-
-      .. code-block:: shell
-
-         export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-         python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
-
-      MAD launches a Docker container with the name
-      ``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
-      model are collected in ``perf.csv``.
-
-      .. note::
-
-         For improved performance, consider enabling TunableOp. By default,
-         ``{{model.mad_tag}}`` runs with TunableOp disabled (see
-         `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable
-         it, edit the default run behavior in the ``tools/run_models.py``-- update the model's
-         run ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-         Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-         Although this might increase the initial training time, it can result in a performance gain.
-
-      {% endfor %}
-   {% endfor %}
-
-Further reading
-===============
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
-
- To learn how to run LLM models from Hugging Face or your model, see
-  :doc:`Running models from Hugging Face <hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../fine-tuning/index>`.
--- a/docs/how-to/rocm-for-ai/inference/vllm-benchmark.rst
+++ b/docs/how-to/rocm-for-ai/inference/vllm-benchmark.rst
@@ -3,9 +3,9 @@
                 ROCm vLLM Docker image.
   :keywords: model, MAD, automation, dashboarding, validate

-**********************************
-vLLM inference performance testing
-**********************************
+********************************************************
+LLM inference performance testing on AMD Instinct MI300X
+********************************************************

 .. _vllm-benchmark-unified-docker:

@@ -16,7 +16,7 @@ vLLM inference performance testing

   The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
+   inference performance on AMD Instinct™ MI300X series accelerator. This ROCm vLLM
   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
   accelerators and includes the following components:

@@ -34,7 +34,7 @@ vLLM inference performance testing

   .. _vllm-benchmark-available-models:

-   Supported models
+   Available models
   ================

   .. raw:: html
@@ -111,37 +111,35 @@ vLLM inference performance testing
   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
   see the developer's guide at `<https://github.com/ROCm/vllm/blob/main/docs/dev-docker/README.md>`__.

-   System validation
-   =================
+   Getting started
+   ===============

-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
+   Use the following procedures to reproduce the benchmark results on an
+   MI300X accelerator with the prebuilt vLLM Docker image.

-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
+   .. _vllm-benchmark-get-started:

-   .. code-block:: shell
+   1. Disable NUMA auto-balancing.

-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
+      To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
+      might hang until the periodic balancing is finalized. For more information,
+      see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.

-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
+      .. code-block:: shell

-   Pull the Docker image
-   =====================
+         # disable automatic NUMA balancing
+         sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
+         # check if NUMA balancing is disabled (returns 0 if disabled)
+         cat /proc/sys/kernel/numa_balancing
+         0

-   Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
-   Use the following command to pull the Docker image from Docker Hub.
+   2. Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.

-   .. code-block:: shell
+      Use the following command to pull the Docker image from Docker Hub.

-      docker pull {{ unified_docker.pull_tag }}
+      .. code-block:: shell
+
+         docker pull {{ unified_docker.pull_tag }}

   Benchmarking
   ============
@@ -185,25 +183,6 @@ vLLM inference performance testing
            to collect latency and throughput performance data, you can also change the benchmarking
            parameters. See the standalone benchmarking tab for more information.

-            {% if model.tunableop %}
-
-            .. note::
-
-               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
-               TunableOp automatically explores different implementations and configurations of certain PyTorch
-               operators to find the fastest one for your hardware.
-
-               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
-               (see
-               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
-               enable it, edit the default run behavior in the ``models.json``
-               configuration before running inference -- update the model's run
-               ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-
-            {% endif %}
-
         .. tab-item:: Standalone benchmarking

            Run the vLLM benchmark tool independently by starting the
@@ -278,7 +257,7 @@ vLLM inference performance testing

            * Latency benchmark

-              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
+              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with the ``{{model.precision}}`` data type.

              .. code-block::

@@ -288,11 +267,11 @@ vLLM inference performance testing

            * Throughput benchmark

-              Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
+              Use this command to throughput the latency of the {{model.model}} model on eight GPUs with the ``{{model.precision}}`` data type.

              .. code-block:: shell

-                 ./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
+                 ./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}

              Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.

@@ -352,18 +331,11 @@ for benchmarking, see the version-specific documentation.
     - PyTorch version
     - Resources

-   * - 6.3.1
-     - 0.7.3
-     - 2.7.0
-     - 
-       * `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
-
   * - 6.3.1
     - 0.6.6
     - 2.7.0
     - 
-       * `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
+       * `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_

   * - 6.2.1
--- a/docs/how-to/rocm-for-ai/system-health-check.rst
+++ b/docs/how-to/rocm-for-ai/system-health-check.rst
@@ -1,104 +0,0 @@
-.. meta::
-   :description: System health checks with RVS, RCCL tests, BabelStream, and TransferBench to validate AMD hardware performance running AI workloads.
-   :keywords: gpu, accelerator, system, health, validation, bench, perf, performance, rvs, rccl, babel, mi300x, mi325x, flops, bandwidth, rbt, training, inference
-
-.. _rocm-for-ai-system-health-bench:
-
-************************
-System health benchmarks
-************************
-
-Before running AI workloads, it is important to validate that your AMD hardware is configured correctly and is performing optimally. This topic outlines several system health benchmarks you can use to test key aspects like GPU compute capabilities (FLOPS), memory bandwidth, and interconnect performance. Many of these tests are part of the ROCm Validation Suite (RVS).
-
-ROCm Validation Suite (RVS) tests
-=================================
-
-RVS provides a collection of tests, benchmarks, and qualification tools, each
-targeting a specific subsystem of the system under test. It includes tests for
-GPU stress and memory bandwidth.
-
-.. _healthcheck-install-rvs:
-
-Install ROCm Validation Suite
-----------------------------
-
-To get started, install RVS. For example, on an Ubuntu system with ROCm already
-installed, run the following command:
-
-.. code-block:: shell
-
-   sudo apt update
-   sudo apt install rocm-validation-suite
-
-See the `ROCm Validation Suite installation instructions <https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/install/installation.html>`_,
-and `System validation tests <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#system-validation-tests>`_
-in the Instinct documentation for more detailed instructions.
-
-Benchmark, stress, and qualification tests
------------------------------------------
-
-The GPU stress test runs various GEMM computations as workloads to stress the GPU FLOPS performance and check whether it
-meets the configured target GFLOPS.
-
-Run the benchmark, stress, and qualification tests included with RVS. See the `Benchmark, stress, qualification
-<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#benchmark-stress-qualification>`_
-section of the Instinct documentation for usage instructions.
-
-BabelStream test
----------------
-
-BabelStream is a synthetic GPU benchmark based on the STREAM benchmark for
-CPUs, measuring memory transfer rates to and from global device memory.
-BabelStream tests are included with the RVS package as part of the `BABEL module
-<https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/conceptual/rvs-modules.html#babel-benchmark-test-babel-module>`_.
-
-For more information, see `Performance benchmarking
-<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#babelstream-benchmarking-results>`_
-in the Instinct documentation.
-
-RCCL tests
-==========
-
-The ROCm Communication Collectives Library (RCCL) enables efficient multi-GPU
-communication. The `<https://github.com/ROCm/rccl-tests>`__ suite benchmarks
-the performance and verifies the correctness of these collective operations.
-This helps ensure optimal scaling for multi-accelerator tasks.
-
-1. To get started, build RCCL-tests using the official instructions in the README at
-   `<https://github.com/ROCm/rccl-tests?tab=readme-ov-file#build>`__ or use the
-   following commands:
-
-   .. code-block:: shell
-
-      git clone https://github.com/ROCm/rccl-tests.git
-      cd rccl-tests
-      make
-
-2. Run the suggested RCCL tests -- see `RCCL benchmarking
-   <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#rccl-benchmarking-results>`_
-   in the Instinct performance benchmarking documentation for instructions.
-
-TransferBench test
-==================
-
-TransferBench is a standalone utility for benchmarking simultaneous data
-transfer performance between various devices in the system, including
-CPU-to-GPU and GPU-to-GPU (peer-to-peer). This helps identify potential
-bottlenecks in data movement between the host system and the GPUs, or between
-GPUs, which can impact end-to-end latency.
-
-.. _healthcheck-install-transferbench:
-
-1. To get started, use the instructions in the `TransferBench documentation
-   <https://rocm.docs.amd.com/projects/TransferBench/en/latest/install/install.html#install-transferbench>`_
-   or use the following commands:
-
-   .. code:: shell
-
-      git clone https://github.com/ROCm/TransferBench.git
-      cd TransferBench
-      CC=hipcc make
-
-2. Run the suggested TransferBench tests -- see `TransferBench benchmarking
-   <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#transferbench-benchmarking-results>`_
-   in the Instinct performance benchmarking documentation for instructions.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
@@ -12,7 +12,7 @@ ROCm is an optimized fork of the upstream
 `<https://github.com/AI-Hypercomputer/maxtext>`__ enabling efficient AI workloads
 on AMD MI300X series accelerators.

-The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.5``) image
+The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.4``) image
 provides a prebuilt environment for training on AMD Instinct MI300X and MI325X accelerators,
 including essential components like JAX, XLA, ROCm libraries, and MaxText utilities.
 It includes the following software components:
@@ -20,15 +20,15 @@ It includes the following software components:
 +--------------------------+--------------------------------+
 | Software component       | Version                        |
 +==========================+================================+
-| ROCm                     | 6.3.4                          |
+| ROCm                     | 6.3.0                          |
 +--------------------------+--------------------------------+
-| JAX                      | 0.4.35                         |
+| JAX                      | 0.4.31                         |
 +--------------------------+--------------------------------+
-| Python                   | 3.10.12                        |
+| Python                   | 3.10                           |
 +--------------------------+--------------------------------+
-| Transformer Engine       | 1.12.0.dev0+b8b92dc            |
+| Transformer Engine       | 1.12.0.dev0+f81a3eb            |
 +--------------------------+--------------------------------+
-| hipBLASLt                | 0.13.0-ae9c477a                |
+| hipBLASLt                | git78ec8622                    |
 +--------------------------+--------------------------------+

 Supported features and models
@@ -48,8 +48,6 @@ MaxText provides the following key features to train large language models effic

 The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.

-* Llama 3.3 70B
-
 * Llama 3.1 8B

 * Llama 3.1 70B
@@ -79,18 +77,11 @@ across different input sequences. Support for packed input format is planned for
 System validation
 =================

-Before running AI workloads, it's important to validate that your AMD hardware is configured
-correctly and performing optimally.
-
-If you have already validated your system settings, including aspects like NUMA auto-balancing, you
-can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
-optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+If you have already validated your system settings, including NUMA
+auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
+and optimization steps <train-a-model-system-validation>` to set up your system
 before starting training.

-To test for optimal performance, consult the recommended :ref:`System health benchmarks
-<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-system's configuration.
-
 Environment setup
 =================

@@ -124,7 +115,7 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.

   a. Master address

-      Change ``localhost`` to the master node's resolvable hostname or IP address:
+      Change `localhost` to the master node's resolvable hostname or IP address:

      .. code-block:: bash

@@ -182,22 +173,20 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.

 .. _amd-maxtext-download-docker:

-Pull the Docker image
---------------------
+Download the Docker image
+-------------------------

 1. Use the following command to pull the Docker image from Docker Hub.

   .. code-block:: shell

-      docker pull rocm/jax-training:maxtext-v25.5
+      docker pull rocm/jax-training:maxtext-v25.4

-2. Use the following command to launch the Docker container. Note that the benchmarking scripts
-   used in the :ref:`following section <amd-maxtext-get-started>` automatically launch the Docker container
-   and execute the benchmark.
+2. Run the Docker container.

   .. code-block:: shell

-      docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.5
+      docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.4

 .. _amd-maxtext-get-started:

@@ -230,9 +219,7 @@ Single node training benchmarking examples

  Run the single node training benchmark:

-  .. code-block:: shell
-
-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_7b.sh
+  IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_7b.sh

 * Example 2: Single node training with Llama 2 70B

@@ -246,7 +233,7 @@ Single node training benchmarking examples

  .. code-block:: shell

-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_70b.sh
+     IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_70b.sh

 * Example 3: Single node training with Llama 3 8B

@@ -260,7 +247,7 @@ Single node training benchmarking examples

  .. code-block:: shell

-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_8b.sh
+     IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_8b.sh

 * Example 4: Single node training with Llama 3 70B

@@ -274,23 +261,9 @@ Single node training benchmarking examples

  .. code-block:: shell

-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_70b.sh
+     IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_70b.sh

-* Example 5: Single node training with Llama 3.3 70B
-
-  Download the benchmarking script:
-
-  .. code-block:: shell
-
-     wget https://raw.githubusercontent.com/ROCm/maxtext/refs/heads/main/benchmarks/gpu-rocm/llama3.3_70b.sh
-
-  Run the single node training benchmark:
-
-  .. code-block:: shell
-
-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3.3_70b.sh
-
-* Example 6: Single node training with DeepSeek V2 16B
+* Example 5: Single node training with DeepSeek V2 16B

  Download the benchmarking script:

@@ -302,7 +275,7 @@ Single node training benchmarking examples

  .. code-block:: shell

-     IMAGE="rocm/jax-training:maxtext-v25.5" bash ./deepseek_v2_16b.sh
+     IMAGE="rocm/jax-training:maxtext-v25.4" bash ./deepseek_v2_16b.sh

  .. note::

@@ -370,26 +343,3 @@ own cluster setup.
  .. code-block:: shell

     sbatch -N <num_nodes> llama3_70b_multinode.sh
-
-Previous versions
-=================
-
-This table lists previous versions of the ROCm JAX MaxText Docker image for training
-performance testing. For detailed information about available models for
-benchmarking, see the version-specific documentation.
-
-.. list-table::
-   :header-rows: 1
-   :stub-columns: 1
-
-   * - Image version
-     - ROCm version
-     - JAX version
-     - Resources
-
-   * - 25.4
-     - 6.3.0
-     - 0.4.31
-     - 
-       * `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.html>`_
-       * `Docker Hub <https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.4/images/sha256-fb3eb71cd74298a7b3044b7130cf84113f14d518ff05a2cd625c11ea5f6a7b01>`_
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
@@ -103,18 +103,11 @@ popular AI models.
 System validation
 =================

-Before running AI workloads, it's important to validate that your AMD hardware is configured
-correctly and performing optimally.
-
-If you have already validated your system settings, including aspects like NUMA auto-balancing, you
-can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
-optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+If you have already validated your system settings, including NUMA
+auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
+and optimization steps <train-a-model-system-validation>` to set up your system
 before starting training.

-To test for optimal performance, consult the recommended :ref:`System health benchmarks
-<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-system's configuration.
-
 .. _mi300x-amd-megatron-lm-training:

 Environment setup
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry.rst
@@ -1,175 +0,0 @@
-.. meta::
-   :description: How to train a model using LLM Foundry for ROCm.
-   :keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
-
-******************************************
-Training MPT-30B with LLM Foundry and ROCm
-******************************************
-
-MPT-30B is a 30-billion parameter decoder-style transformer-based model from
-the Mosaic Pretrained Transformer (MPT) family -- learn more about it in
-MosaicML's research blog `MPT-30B: Raising the bar for open-source foundation
-models <https://www.databricks.com/blog/mpt-30b>`_.
-
-ROCm and `<https://github.com/ROCm/MAD>`__ provide a pre-configured training
-environment for the MPT-30B model using the ``rocm/pytorch-training:v25.5``
-base `Docker image <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
-and the `LLM Foundry <https://github.com/mosaicml/llm-foundry>`_ framework.
-This environment packages the following software components to train
-on AMD Instinct MI300X series accelerators:
-
-+--------------------------+--------------------------------+
-| Software component       | Version                        |
-+==========================+================================+
-| ROCm                     | 6.3.4                          |
-+--------------------------+--------------------------------+
-| PyTorch                  | 2.7.0a0+git6374332             |
-+--------------------------+--------------------------------+
-| Flash Attention          | 3.0.0.post1                    |
-+--------------------------+--------------------------------+
-
-Using this image, you can build, run, and test the training process
-for MPT-30B with access to detailed logs and performance metrics.
-
-System validation
-=================
-
-Before running AI workloads, it's important to validate that your AMD hardware is configured
-correctly and performing optimally.
-
-If you have already validated your system settings, including aspects like NUMA auto-balancing, you
-can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
-optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
-before starting training.
-
-To test for optimal performance, consult the recommended :ref:`System health benchmarks
-<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-system's configuration.
-
-Getting started
-===============
-
-The following procedures help you set up the training environment in a
-reproducible Docker container. This training environment is tailored for
-training MPT-30B using LLM Foundry and the specific model configurations outlined.
-Other configurations and run conditions outside those described in this
-document are not validated.
-
-.. tab-set::
-
-   .. tab-item:: MAD-integrated benchmarking
-
-      On your host machine, clone the ROCm Model Automation and Dashboarding
-      (`<https://github.com/ROCm/MAD>`__) repository to a local directory and
-      install the required packages.
-
-      .. code-block:: shell
-
-         git clone https://github.com/ROCm/MAD
-         cd MAD
-         pip install -r requirements.txt
-
-      Use this command to initiate the MPT-30B training benchmark.
-
-      .. code-block:: shell
-
-         python3 tools/run_models.py --tags pyt_mpt30b_training --keep-model-dir --live-output --clean-docker-cache
-
-      .. tip::
-
-         If you experience data download failures, set the
-         ``MAD_SECRETS_HFTOKEN`` variable to your Hugging Face access token. See
-         `User access tokens <https://huggingface.co/docs/hub/security-tokens>`_
-         for details.
-
-         .. code-block:: shell
-
-            export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-
-      .. note::
-
-         For improved performance (training throughput), consider enabling TunableOp.
-         By default, ``pyt_mpt30b_training`` runs with TunableOp disabled. To enable it,
-         run ``tools/run_models.py`` with the ``--tunableop on`` argument or edit the
-         ``models.json`` configuration before running training.
-
-         Although this might increase the initial training time, it can result in a performance gain.
-
-   .. tab-item:: Standalone benchmarking
-
-      To set up the training environment, clone the
-      `<https://github.com/ROCm/MAD>`__ repo and build the Docker image. In
-      this snippet, the image is named ``mosaic_mpt30_image``.
-
-      .. code-block:: shell
-
-         git clone https://github.com/ROCm/MAD
-         cd MAD
-
-         docker build --build-arg MAD_SYSTEM_GPU_ARCHITECTURE=gfx942 -f docker/pyt_mpt30b_training.ubuntu.amd.Dockerfile -t mosaic_mpt30_image .
-
-      Start a ``mosaic_mpt30_image`` container using the following command.
-
-      .. code-block:: shell
-
-         docker run -it --device=/dev/kfd --device=/dev/dri --group-add=video --ipc=host --shm-size=8G mosaic_mpt30_image
-
-      In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
-      repository and navigate to the benchmark scripts directory at
-      ``/workspace/MAD/scripts/pyt_mpt30b_training``.
-
-      .. code-block:: shell
-
-         git clone https://github.com/ROCm/MAD
-         cd MAD/scripts/pyt_mpt30b_training
-
-      To initiate the training process, use the following command. This script uses the hyperparameters defined in
-      ``mpt-30b-instruct.yaml``.
-
-      .. code-block:: shell
-
-         source run.sh
-
-      .. note::
-
-         For improved performance (training throughput), consider enabling TunableOp.
-         To enable it, add the ``--tunableop on`` flag.
-
-         .. code-block:: shell
-
-            source run.sh --tunableop on
-
-         Although this might increase the initial training time, it can result in a performance gain.
-
-Interpreting the output
-=======================
-
-The training output will be displayed in the terminal and simultaneously saved
-to the ``output.txt`` file in the current directory. Key performance metrics will
-also be extracted and appended to the ``perf_pyt_mpt30b_training.csv`` file.
-
-Key performance metrics include:
-
- Training logs: Real-time display of loss metrics, accuracy, and training progress.
-
- Model checkpoints: Periodically saved model snapshots for potential resume or evaluation.
-
- Performance metrics: Detailed summaries of training speed and training loss metrics.
-
-  - Performance (throughput/samples_per_sec)
-
-    Overall throughput, measuring the total samples processed per second. Higher values indicate better hardware utilization.
-
-  - Performance per device (throughput/samples_per_sec)
-
-    Throughput on a per-device basis, showing how each GPU or CPU is performing.
-
-  - Language Cross Entropy (metrics/train/LanguageCrossEntropy)
-
-    Measures prediction accuracy. Lower cross entropy suggests the model’s output is closer to the expected distribution.
-
-  - Training loss (loss/train/total)
-
-    Overall training loss. A decreasing trend indicates the model is learning effectively.
-
-
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
@@ -9,8 +9,7 @@ Training a model with PyTorch for ROCm
 PyTorch is an open-source machine learning framework that is widely used for
 model training with GPU-optimized components for transformer-based models.

-The `PyTorch for ROCm training Docker <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
-(``rocm/pytorch-training:v25.5``) image
+The PyTorch for ROCm training Docker (``rocm/pytorch-training:v25.4``) image
 provides a prebuilt optimized environment for fine-tuning and pretraining a
 model on AMD Instinct MI325X and MI300X accelerators. It includes the following
 software components to accelerate training workloads:
@@ -18,19 +17,19 @@ software components to accelerate training workloads:
 +--------------------------+--------------------------------+
 | Software component       | Version                        |
 +==========================+================================+
-| ROCm                     | 6.3.4                          |
+| ROCm                     | 6.3.0                          |
 +--------------------------+--------------------------------+
 | PyTorch                  | 2.7.0a0+git637433              |
 +--------------------------+--------------------------------+
 | Python                   | 3.10                           |
 +--------------------------+--------------------------------+
-| Transformer Engine       | 1.12.0.dev0+25a33da            |
+| Transformer Engine       | 1.11                           |
 +--------------------------+--------------------------------+
 | Flash Attention          | 3.0.0                          |
 +--------------------------+--------------------------------+
-| hipBLASLt                | git53b53bf                     |
+| hipBLASLt                | git258a2162                    |
 +--------------------------+--------------------------------+
-| Triton                   | 3.2.0                          |
+| Triton                   | 3.1                            |
 +--------------------------+--------------------------------+

 .. _amd-pytorch-training-model-support:
@@ -40,8 +39,6 @@ Supported models

 The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.

-* Llama 3.3 70B
-
 * Llama 3.1 8B

 * Llama 3.1 70B
@@ -77,358 +74,314 @@ popular AI models.
 System validation
 =================

-Before running AI workloads, it's important to validate that your AMD hardware is configured
-correctly and performing optimally.
-
-If you have already validated your system settings, including aspects like NUMA auto-balancing, you
-can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
-optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+If you have already validated your system settings, including NUMA
+auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
+and optimization steps <train-a-model-system-validation>` to set up your system
 before starting training.

-To test for optimal performance, consult the recommended :ref:`System health benchmarks
-<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-system's configuration.
+Environment setup
+=================

 This Docker image is optimized for specific model configurations outlined
 below. Performance can vary for other training workloads, as AMD 
 doesn’t validate configurations and run conditions outside those described.

-Benchmarking
-============
+Download the Docker image
+-------------------------

-Once the setup is complete, choose between two options to start benchmarking:
+1. Use the following command to pull the Docker image from Docker Hub.

-.. tab-set::
+   .. code-block:: shell

-   .. tab-item:: MAD-integrated benchmarking
+      docker pull rocm/pytorch-training:v25.4

-      Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-      directory and install the required packages on the host machine.
+2. Run the Docker container.

-      .. code-block:: shell
+   .. code-block:: shell

-         git clone https://github.com/ROCm/MAD
-         cd MAD
-         pip install -r requirements.txt
+      docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v  $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.4

-      For example, use this command to run the performance benchmark test on the Llama 3.1 8B model
-      using one GPU with the float16 data type on the host machine.
+3. Use these commands if you exit the ``training_env`` container and need to return to it.

-      .. code-block:: shell
+   .. code-block:: shell

-         export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-         python3 tools/run_models.py --tags pyt_train_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
+      docker start training_env
+      docker exec -it training_env bash

-      The available models for MAD-integrated benchmarking are:
+4. In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
+   repository and navigate to the benchmark scripts directory
+   ``/workspace/MAD/scripts/pytorch_train``.

-      * ``pyt_train_llama-3.3-70b``
+   .. code-block:: shell

-      * ``pyt_train_llama-3.1-8b``
+      git clone https://github.com/ROCm/MAD
+      cd MAD/scripts/pytorch_train

-      * ``pyt_train_llama-3.1-70b``
+Prepare training datasets and dependencies
+------------------------------------------

-      * ``pyt_train_flux``
+The following benchmarking examples require downloading models and datasets
+from Hugging Face. To ensure successful access to gated repos, set your
+``HF_TOKEN``.

-      MAD launches a Docker container with the name
-      ``container_ci-pyt_train_llama-3.1-8b``, for example. The latency and throughput reports of the
-      model are collected in the following path: ``~/MAD/perf.csv``.
+.. code-block:: shell

-   .. tab-item:: Standalone benchmarking
+   export HF_TOKEN=$your_personal_hugging_face_access_token

-      .. rubric:: Download the Docker image and required packages
+Run the setup script to install libraries and datasets needed for benchmarking.

-      Use the following command to pull the Docker image from Docker Hub.
+.. code-block:: shell

-      .. code-block:: shell
+   ./pytorch_benchmark_setup.sh

-         docker pull rocm/pytorch-training:v25.5
+``pytorch_benchmark_setup.sh`` installs the following libraries:

-      Run the Docker container.
+.. list-table::
+   :header-rows: 1

-      .. code-block:: shell
+   * - Library
+     - Benchmark model
+     - Reference

-         docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v  $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.5
+   * - ``accelerate``
+     - Llama 3.1 8B, FLUX
+     - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_

-      Use these commands if you exit the ``training_env`` container and need to return to it.
+   * - ``datasets``
+     - Llama 3.1 8B, 70B, FLUX
+     - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0

-      .. code-block:: shell
+   * - ``torchdata``
+     - Llama 3.1 70B
+     - `TorchData <https://pytorch.org/data/beta/index.html>`_

-         docker start training_env
-         docker exec -it training_env bash
+   * - ``tomli``
+     - Llama 3.1 70B
+     - `Tomli <https://pypi.org/project/tomli/>`_

-      In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
-      repository and navigate to the benchmark scripts directory
-      ``/workspace/MAD/scripts/pytorch_train``.
+   * - ``tiktoken``
+     - Llama 3.1 70B
+     - `tiktoken <https://github.com/openai/tiktoken>`_

-      .. code-block:: shell
+   * - ``blobfile``
+     - Llama 3.1 70B
+     - `blobfile <https://pypi.org/project/blobfile/>`_

-         git clone https://github.com/ROCm/MAD
-         cd MAD/scripts/pytorch_train
+   * - ``tabulate``
+     - Llama 3.1 70B
+     - `tabulate <https://pypi.org/project/tabulate/>`_

-      .. rubric:: Prepare training datasets and dependencies
+   * - ``wandb``
+     - Llama 3.1 70B
+     - `Weights & Biases <https://github.com/wandb/wandb>`_

-      The following benchmarking examples require downloading models and datasets
-      from Hugging Face. To ensure successful access to gated repos, set your
-      ``HF_TOKEN``.
+   * - ``sentencepiece``
+     - Llama 3.1 70B, FLUX
+     - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0

-      .. code-block:: shell
+   * - ``tensorboard``
+     - Llama 3.1 70 B, FLUX
+     - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0

-         export HF_TOKEN=$your_personal_hugging_face_access_token
+   * - ``csvkit``
+     - FLUX
+     - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1

-      Run the setup script to install libraries and datasets needed for benchmarking.
+   * - ``deepspeed``
+     - FLUX
+     - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2

-      .. code-block:: shell
+   * - ``diffusers``
+     - FLUX
+     - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0

-         ./pytorch_benchmark_setup.sh
+   * - ``GitPython``
+     - FLUX
+     - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44

-      ``pytorch_benchmark_setup.sh`` installs the following libraries:
+   * - ``opencv-python-headless``
+     - FLUX
+     - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84

-      .. list-table::
-         :header-rows: 1
+   * - ``peft``
+     - FLUX
+     - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0

-         * - Library
-           - Benchmark model
-           - Reference
+   * - ``protobuf``
+     - FLUX
+     - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2

-         * - ``accelerate``
-           - Llama 3.1 8B, FLUX
-           - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
+   * - ``pytest``
+     - FLUX
+     - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4

-         * - ``datasets``
-           - Llama 3.1 8B, 70B, FLUX
-           - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
+   * - ``python-dotenv``
+     - FLUX
+     - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1

-         * - ``torchdata``
-           - Llama 3.1 70B
-           - `TorchData <https://pytorch.org/data/beta/index.html>`_
+   * - ``seaborn``
+     - FLUX
+     - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2

-         * - ``tomli``
-           - Llama 3.1 70B
-           - `Tomli <https://pypi.org/project/tomli/>`_
+   * - ``transformers``
+     - FLUX
+     - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0

-         * - ``tiktoken``
-           - Llama 3.1 70B
-           - `tiktoken <https://github.com/openai/tiktoken>`_
+``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:

-         * - ``blobfile``
-           - Llama 3.1 70B
-           - `blobfile <https://pypi.org/project/blobfile/>`_
+* `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_

-         * - ``tabulate``
-           - Llama 3.1 70B
-           - `tabulate <https://pypi.org/project/tabulate/>`_
+* `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_

-         * - ``wandb``
-           - Llama 3.1 70B
-           - `Weights & Biases <https://github.com/wandb/wandb>`_
+Along with the following datasets:

-         * - ``sentencepiece``
-           - Llama 3.1 70B, FLUX
-           - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
+* `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_

-         * - ``tensorboard``
-           - Llama 3.1 70 B, FLUX
-           - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
+* `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_

-         * - ``csvkit``
-           - FLUX
-           - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
+* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_

-         * - ``deepspeed``
-           - FLUX
-           - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
+Getting started
+===============

-         * - ``diffusers``
-           - FLUX
-           - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
+The prebuilt PyTorch with ROCm training environment allows users to quickly validate
+system performance, conduct training benchmarks, and achieve superior
+performance for models like Llama 3.1 and Llama 2. This container should not be
+expected to provide generalized performance across all training workloads. You
+can expect the container to perform in the model configurations described in
+the following section, but other configurations are not validated by AMD.

-         * - ``GitPython``
-           - FLUX
-           - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
+Use the following instructions to set up the environment, configure the script
+to train models, and reproduce the benchmark results on MI325X and MI300X
+accelerators with the AMD PyTorch training Docker image.

-         * - ``opencv-python-headless``
-           - FLUX
-           - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
+Once your environment is set up, use the following commands and examples to start benchmarking.

-         * - ``peft``
-           - FLUX
-           - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
+Pretraining
+-----------

-         * - ``protobuf``
-           - FLUX
-           - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
+To start the pretraining benchmark, use the following command with the
+appropriate options. See the following list of options and their descriptions.

-         * - ``pytest``
-           - FLUX
-           - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
+.. code-block:: shell

-         * - ``python-dotenv``
-           - FLUX
-           - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
+   ./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length

-         * - ``seaborn``
-           - FLUX
-           - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
+Options and available models
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-         * - ``transformers``
-           - FLUX
-           - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
+.. list-table::
+   :header-rows: 1

-      ``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
+   * - Name
+     - Options
+     - Description

-      * `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+   * - ``$training_mode``
+     - ``pretrain``
+     - Benchmark pretraining

-      * `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+   * -
+     - ``finetune_fw``
+     - Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)

-      Along with the following datasets:
+   * -
+     - ``finetune_lora``
+     - Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)

-      * `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
+   * -
+     - ``HF_finetune_lora``
+     - Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)

-      * `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
+   * - ``$datatype``
+     - ``FP8`` or ``BF16``
+     - Only Llama 3.1 8B supports FP8 precision.

-      * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
+   * - ``$model_repo``
+     - ``Llama-3.1-8B``
+     - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_

-      .. rubric:: Pretraining
+   * - 
+     - ``Llama-3.1-70B``
+     - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_

-      To start the pretraining benchmark, use the following command with the
-      appropriate options. See the following list of options and their descriptions.
+   * - 
+     - ``Llama-2-70B``
+     - `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_

-      .. code-block:: shell
+   * - 
+     - ``Flux``
+     - `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_

-         ./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
+   * - ``$sequence_length``
+     - Sequence length for the language model.
+     - Between 2048 and 8192. 8192 by default.

-      .. list-table::
-         :header-rows: 1
+.. note::

-         * - Name
-           - Options
-           - Description
+   Occasionally, downloading the Flux dataset might fail. In the event of this
+   error, manually download it from Hugging Face at
+   `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+   and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
+   the required dataset.

-         * - ``$training_mode``
-           - ``pretrain``
-           - Benchmark pretraining
+Fine-tuning
+-----------

-         * -
-           - ``finetune_fw``
-           - Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
+To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
+with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.

-         * -
-           - ``finetune_lora``
-           - Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
+.. code-block:: shell

-         * -
-           - ``HF_finetune_lora``
-           - Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
+   ./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B

-         * - ``$datatype``
-           - ``FP8`` or ``BF16``
-           - Only Llama 3.1 8B supports FP8 precision.
+Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
+`Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.

-         * - ``$model_repo``
-           - ``Llama-3.3-70B``
-           - `Llama 3.3 70B <https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct>`_
+.. code-block:: shell

-         * - 
-           - ``Llama-3.1-8B``
-           - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
+   ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B

-         * - 
-           - ``Llama-3.1-70B``
-           - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+Benchmarking examples
+---------------------

-         * - 
-           - ``Llama-2-70B``
-           - `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
+Here are some examples of how to use the command.

-         * - 
-           - ``Flux``
-           - `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+* Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.

-         * - ``$sequence_length``
-           - Sequence length for the language model.
-           - Between 2048 and 8192. 8192 by default.
+  .. code-block:: shell

-      .. note::
+     ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192

-         Occasionally, downloading the Flux dataset might fail. In the event of this
-         error, manually download it from Hugging Face at
-         `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
-         and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
-         the required dataset.
+* Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.

-      .. rubric:: Fine-tuning
+  .. code-block:: shell

-      To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
-      with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
+     ./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192

-      .. code-block:: shell
+* Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.

-         ./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
+  .. code-block:: shell

-      Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
-      `Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
+     ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux

-      .. code-block:: shell
+* Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B

-         ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+  .. code-block:: shell

-      .. rubric:: Benchmarking examples
+     ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B

-      Here are some example commands to get started pretraining and fine-tuning with various model configurations.
+* Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B

-      * Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
+  .. code-block:: shell

-        .. code-block:: shell
+     ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B

-           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
+* Example 6: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B

-      * Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
+  .. code-block:: shell

-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
-
-      * Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
-
-      * Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
-
-      * Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
-
-      * Example 6: Torchtune full weight fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.3-70B
-
-      * Example 7: Torchtune LoRA fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.3-70B
-
-      * Example 8: Torchtune QLoRA fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_qlora -p BF16 -m Llama-3.3-70B
-
-      * Example 9: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+     ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B

 Previous versions
 =================
@@ -446,13 +399,6 @@ benchmarking, see the version-specific documentation.
     - PyTorch version
     - Resources

-   * - v25.4
-     - 6.3.0
-     - 2.7.0a0+git637433
-     - 
-       * `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
-       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.4/images/sha256-fa98a9aa69968e654466c06f05aaa12730db79b48b113c1ab4f7a5fe6920a20b>`_
-
   * - v25.3
     - 6.3.0
     - 2.7.0a0+git637433
--- a/docs/how-to/rocm-for-ai/training/index.rst
+++ b/docs/how-to/rocm-for-ai/training/index.rst
@@ -21,12 +21,8 @@ In this guide, you'll learn about:

 - Training a model

-  - :doc:`With Megatron-LM <benchmark-docker/megatron-lm>`
+  - :doc:`Train a model with Megatron-LM <benchmark-docker/megatron-lm>`

-  - :doc:`With PyTorch <benchmark-docker/pytorch-training>`
-
-  - :doc:`With JAX MaxText <benchmark-docker/jax-maxtext>`
-
-  - :doc:`With LLM Foundry <benchmark-docker/mpt-llm-foundry>`
+  - :doc:`Train a model with PyTorch <benchmark-docker/pytorch-training>`

 - :doc:`Scaling model training <scale-model-training>`
--- a/docs/how-to/rocm-for-ai/training/prerequisite-system-validation.rst
+++ b/docs/how-to/rocm-for-ai/training/prerequisite-system-validation.rst
@@ -5,13 +5,12 @@
   :keywords: ROCm, AI, LLM, train, megatron, Llama, tutorial, docker, torch, pytorch, jax

 .. _train-a-model-system-validation:
-.. _rocm-for-ai-system-optimization:

-**********************************************************
-Prerequisite system validation before running AI workloads
-**********************************************************
+**********************************************
+Prerequisite system validation before training
+**********************************************

-Complete the following system validation and optimization steps to set up your system before starting training and inference.
+Complete the following system validation and optimization steps to set up your system before starting training.

 Disable NUMA auto-balancing
 ---------------------------
@@ -27,8 +26,7 @@ the output is ``1``, run the following command to disable NUMA auto-balancing.

   sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'

-See `Disable NUMA auto-balancing <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#disable-numa-auto-balancing>`_
-in the Instinct documentation for more information.
+See :ref:`mi300x-disable-numa` for more information.

 Hardware verification with ROCm
 -------------------------------
@@ -44,8 +42,7 @@ Run the command:

   rocm-smi --setperfdeterminism 1900

-See `Hardware verfication for ROCm <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#hardware-verification-with-rocm>`_
-in the Instinct documentation for more information.
+See :ref:`mi300x-hardware-verification-with-rocm` for more information.

 RCCL Bandwidth Test for multi-node setups
 -----------------------------------------
--- a/docs/reference/api-libraries.md
+++ b/docs/reference/api-libraries.md
@@ -45,7 +45,6 @@
 (communication-libraries)=

 * {doc}`RCCL <rccl:index>`
-* {doc}`rocSHMEM <rocSHMEM:index>`
 :::

 :::{grid-item-card} Math
--- a/docs/reference/gpu-arch-specs.rst
+++ b/docs/reference/gpu-arch-specs.rst
@@ -296,7 +296,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -314,7 +314,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -332,7 +332,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -350,7 +350,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -368,7 +368,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -386,7 +386,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -404,7 +404,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -422,7 +422,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -440,7 +440,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -519,7 +519,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -537,7 +537,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -555,7 +555,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -573,7 +573,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -591,7 +591,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 768
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -609,7 +609,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 11
          - 0
        *
@@ -627,7 +627,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -645,7 +645,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -663,7 +663,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -681,7 +681,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -699,7 +699,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -717,7 +717,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -735,7 +735,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -753,7 +753,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -771,7 +771,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
@@ -789,7 +789,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 16
          - 32
          - 512
-          - 32
+          - 16
          - 10
          - 3
        *
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -12,14 +12,14 @@ subtrees:
  - file: compatibility/compatibility-matrix.rst
    title: Compatibility matrix
    entries:
-    - url: https://rocm.docs.amd.com/projects/install-on-linux/en/${branch}/reference/system-requirements.html
+    - url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html
      title: Linux system requirements
    - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
      title: Windows system requirements

 - caption: Install
  entries:
-  - url: https://rocm.docs.amd.com/projects/install-on-linux/en/${branch}/
+  - url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
    title: ROCm on Linux
  - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
    title: HIP SDK on Windows
@@ -36,10 +36,6 @@ subtrees:
    title: Use ROCm for AI
    subtrees:
    - entries:
-      - file: how-to/rocm-for-ai/install.rst
-        title: Installation
-      - file: how-to/rocm-for-ai/system-health-check.rst
-        title: System health benchmarks
      - file: how-to/rocm-for-ai/training/index.rst
        title: Training
        subtrees:
@@ -50,8 +46,6 @@ subtrees:
            title: Train a model with PyTorch
          - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext
            title: Train a model with JAX MaxText
-          - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
-            title: Train a model with LLM Foundry
          - file: how-to/rocm-for-ai/training/scale-model-training.rst
            title: Scale model training

@@ -74,14 +68,14 @@ subtrees:
        title: Inference
        subtrees:
        - entries:
+          - file: how-to/rocm-for-ai/inference/install.rst
+            title: Installation
          - file: how-to/rocm-for-ai/inference/hugging-face-models.rst
            title: Run models from Hugging Face
          - file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
            title: LLM inference frameworks
          - file: how-to/rocm-for-ai/inference/vllm-benchmark.rst
-            title: vLLM inference performance testing
-          - file: how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
-            title: PyTorch inference performance testing
+            title: Performance testing
          - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
            title: Deploy your model

--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -2,55 +2,54 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile docs/sphinx/requirements.in
 #
 accessible-pygments==0.0.5
    # via pydata-sphinx-theme
 alabaster==1.0.0
    # via sphinx
-appnope==0.1.4
-    # via ipykernel
 asttokens==3.0.0
    # via stack-data
-attrs==25.3.0
+attrs==25.1.0
    # via
    #   jsonschema
    #   jupyter-cache
    #   referencing
-babel==2.17.0
+babel==2.16.0
    # via
    #   pydata-sphinx-theme
    #   sphinx
-beautifulsoup4==4.13.3
+beautifulsoup4==4.12.3
    # via pydata-sphinx-theme
-breathe==4.36.0
+breathe==4.35.0
    # via rocm-docs-core
-certifi==2025.1.31
+certifi==2024.8.30
    # via requests
 cffi==1.17.1
    # via
    #   cryptography
    #   pynacl
-charset-normalizer==3.4.1
+charset-normalizer==3.4.0
    # via requests
-click==8.1.8
+click==8.1.7
    # via
    #   jupyter-cache
    #   sphinx-external-toc
 comm==0.2.2
    # via ipykernel
-cryptography==44.0.2
+cryptography==44.0.1
    # via pyjwt
-debugpy==1.8.14
+debugpy==1.8.12
    # via ipykernel
-decorator==5.2.1
+decorator==5.1.1
    # via ipython
 defusedxml==0.7.1
    # via sphinxcontrib-datatemplates
-deprecated==1.2.18
+deprecated==1.2.15
    # via pygithub
 docutils==0.21.2
    # via
+    #   breathe
    #   myst-parser
    #   pydata-sphinx-theme
    #   sphinx
@@ -58,14 +57,16 @@ exceptiongroup==1.2.2
    # via ipython
 executing==2.2.0
    # via stack-data
-fastjsonschema==2.21.1
+fastjsonschema==2.20.0
    # via
    #   nbformat
    #   rocm-docs-core
-gitdb==4.0.12
+gitdb==4.0.11
    # via gitpython
-gitpython==3.1.44
+gitpython==3.1.43
    # via rocm-docs-core
+greenlet==3.1.1
+    # via sqlalchemy
 idna==3.10
    # via requests
 imagesize==1.4.1
@@ -76,7 +77,7 @@ importlib-metadata==8.6.1
    #   myst-nb
 ipykernel==6.29.5
    # via myst-nb
-ipython==8.35.0
+ipython==8.31.0
    # via
    #   ipykernel
    #   myst-nb
@@ -116,9 +117,9 @@ mdit-py-plugins==0.4.2
    # via myst-parser
 mdurl==0.1.2
    # via markdown-it-py
-myst-nb==1.2.0
+myst-nb==1.1.2
    # via rocm-docs-core
-myst-parser==4.0.1
+myst-parser==4.0.0
    # via myst-nb
 nbclient==0.10.2
    # via
@@ -134,17 +135,16 @@ nest-asyncio==1.6.0
 packaging==24.2
    # via
    #   ipykernel
-    #   pydata-sphinx-theme
    #   sphinx
 parso==0.8.4
    # via jedi
 pexpect==4.9.0
    # via ipython
-platformdirs==4.3.7
+platformdirs==4.3.6
    # via jupyter-core
 prompt-toolkit==3.0.50
    # via ipython
-psutil==7.0.0
+psutil==6.1.1
    # via ipykernel
 ptyprocess==0.7.0
    # via pexpect
@@ -152,19 +152,19 @@ pure-eval==0.2.3
    # via stack-data
 pycparser==2.22
    # via cffi
-pydata-sphinx-theme==0.15.4
+pydata-sphinx-theme==0.16.0
    # via
    #   rocm-docs-core
    #   sphinx-book-theme
-pygithub==2.6.1
+pygithub==2.5.0
    # via rocm-docs-core
-pygments==2.19.1
+pygments==2.18.0
    # via
    #   accessible-pygments
    #   ipython
    #   pydata-sphinx-theme
    #   sphinx
-pyjwt[crypto]==2.10.1
+pyjwt[crypto]==2.10.0
    # via pygithub
 pynacl==1.5.0
    # via pygithub
@@ -178,7 +178,7 @@ pyyaml==6.0.2
    #   rocm-docs-core
    #   sphinx-external-toc
    #   sphinxcontrib-datatemplates
-pyzmq==26.4.0
+pyzmq==26.2.0
    # via
    #   ipykernel
    #   jupyter-client
@@ -192,13 +192,13 @@ requests==2.32.3
    #   sphinx
 rocm-docs-core==1.18.2
    # via -r requirements.in
-rpds-py==0.24.0
+rpds-py==0.22.3
    # via
    #   jsonschema
    #   referencing
 six==1.17.0
    # via python-dateutil
-smmap==5.0.2
+smmap==5.0.1
    # via gitdb
 snowballstemmer==2.2.0
    # via sphinx
@@ -220,7 +220,7 @@ sphinx==8.1.3
    #   sphinx-sitemap
    #   sphinxcontrib-datatemplates
    #   sphinxcontrib-runcmd
-sphinx-book-theme==1.1.4
+sphinx-book-theme==1.1.3
    # via rocm-docs-core
 sphinx-copybutton==0.5.2
    # via rocm-docs-core
@@ -228,7 +228,7 @@ sphinx-design==0.6.1
    # via rocm-docs-core
 sphinx-external-toc==1.0.1
    # via rocm-docs-core
-sphinx-notfound-page==1.1.0
+sphinx-notfound-page==1.0.4
    # via rocm-docs-core
 sphinx-reredirects==0.1.6
    # via -r requirements.in
@@ -250,13 +250,13 @@ sphinxcontrib-runcmd==0.2.0
    # via sphinxcontrib-datatemplates
 sphinxcontrib-serializinghtml==2.0.0
    # via sphinx
-sqlalchemy==2.0.40
+sqlalchemy==2.0.37
    # via jupyter-cache
 stack-data==0.6.3
    # via ipython
 tabulate==0.9.0
    # via jupyter-cache
-tomli==2.2.1
+tomli==2.1.0
    # via sphinx
 tornado==6.4.2
    # via
@@ -272,22 +272,21 @@ traitlets==5.14.3
    #   matplotlib-inline
    #   nbclient
    #   nbformat
-typing-extensions==4.13.2
+typing-extensions==4.12.2
    # via
-    #   beautifulsoup4
    #   ipython
    #   myst-nb
    #   pydata-sphinx-theme
    #   pygithub
    #   referencing
    #   sqlalchemy
-urllib3==2.4.0
+urllib3==2.2.3
    # via
    #   pygithub
    #   requests
 wcwidth==0.2.13
    # via prompt-toolkit
-wrapt==1.17.2
+wrapt==1.17.0
    # via deprecated
 zipp==3.21.0
    # via importlib-metadata
--- a/docs/what-is-rocm.rst
+++ b/docs/what-is-rocm.rst
@@ -10,7 +10,7 @@ ROCm is a software stack, composed primarily of open-source software, that
 provides the tools for programming AMD Graphics Processing Units (GPUs), from
 low-level kernels to high-level end-user applications.

-.. image:: data/rocm-software-stack-6_4_0.jpg
+.. image:: data/rocm-software-stack-6_3_2.jpg
  :width: 800
  :alt: AMD's ROCm software stack and enabling technologies.
  :align: center
@@ -52,7 +52,6 @@ Communication
  :header: "Component", "Description"

  ":doc:`RCCL <rccl:index>`", "Standalone library that provides multi-GPU and multi-node collective communication primitives"
-  ":doc:`rocSHMEM <rocSHMEM:index>`", "An intra-kernel networking library that provides GPU-centric networking through an OpenSHMEM-like interface"

 Math
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/tools/rocm-build/ROCm.mk
+++ b/tools/rocm-build/ROCm.mk
@@ -12,17 +12,36 @@ RELEASE_FLAG?=-r
 ASAN_DEP:=
 ifeq (${ENABLE_ADDRESS_SANITIZER},true)
 	ASAN_DEP=lightning
-	SANITIZER_FLAG=-a
+	SANITIZER_FLAG=--address_sanitizer
+endif
+
+# Set STATIC_FLAG for static builds
+ifeq (${ENABLE_STATIC_BUILDS},true)
+	STATIC_FLAG=-s
 endif

 export INFRA_REPO:=ROCm/tools/rocm-build

+# # commannds to be run at makefile read time
+# # should only output $$OUT_DIR to stdout
+# # In an ideal world this would be another target
+# define INITBUILD
+# source ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1;
+# [ -w "$${ROCM_INSTALL_PATH}" ] || sudo mkdir -p -m 775 "$${ROCM_INSTALL_PATH}" ;
+# sudo chown "$$(id -u):$$(id -g)" "$${ROCM_INSTALL_PATH}" "/home/$$(id -un)" ;
+# mkdir -p ${HOME}/.ccache ;
+# echo $${OUT_DIR} ;
+# endef
+
 OUT_DIR:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${OUT_DIR})
 ROCM_INSTALL_PATH:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${ROCM_INSTALL_PATH})

 $(info OUT_DIR=${OUT_DIR})
 $(info ROCM_INSTALL_PATH=${ROCM_INSTALL_PATH})

+# define SILENT to be empty to see the runner invocation
+SILENT?= @
+
 # -------------------------------------------------------------------------
 # Internal stuff. Could be put in a different file to hide it.
 # Internal macros, they need to be defined before being used.
@@ -47,7 +66,8 @@ endif
 # It should not be needed.
 define adddep =
 $(strip $(call peval,components+= $(1) $(2))
-$(foreach comp,$(strip $2),$(call peval,${OUT_DIR}/logs/${1}: ${OUT_DIR}/logs/${comp}))
+$(call peval,$(1)_DEPS += $(2))
+$(foreach comp,$(strip $2),$(call peval,${OUT_DIR}/logs/${1}.txt: ${OUT_DIR}/logs/${comp}.txt))
 )
 endef
 # End of internal stuff that is needed at the start of the file
@@ -83,27 +103,29 @@ $(call adddep,rocm-core,${ASAN_DEP})
 $(call adddep,rocm-gdb,dbgapi)
 $(call adddep,rocminfo,${ASAN_DEP} rocr)
 $(call adddep,rocprofiler-register,${ASAN_DEP})
-$(call adddep,rocprofiler-sdk,${ASAN_DEP} rocr aqlprofile opencl_on_rocclr hip_on_rocclr comgr)
+$(call adddep,rocprofiler-sdk,${ASAN_DEP} rocr aqlprofile opencl_on_rocclr hip_on_rocclr comgr rccl rocdecode)
 $(call adddep,rocprofiler-systems,${ASAN_DEP} hipcc rocr hip_on_rocclr rocm_smi_lib rocprofiler roctracer rocprofiler-sdk)
 $(call adddep,rocprofiler,${ASAN_DEP} rocr roctracer aqlprofile opencl_on_rocclr hip_on_rocclr comgr)
 $(call adddep,rocprofiler-compute,${ASAN_DEP})
 $(call adddep,rocr,${ASAN_DEP} lightning rocm_smi_lib devicelibs rocprofiler-register)
 $(call adddep,rocr_debug_agent,${ASAN_DEP} hip_on_rocclr rocr dbgapi)
+$(call adddep,rocrsamples,lightning devicelibs rocr )
 $(call adddep,roctracer,${ASAN_DEP} rocr hip_on_rocclr)

+
 # rocm-dev points to all possible last finish components of Stage1 build.
 rocm-dev-components :=amd_smi_lib aqlprofile comgr dbgapi devicelibs hip_on_rocclr hipcc hipify_clang \
 	lightning rocprofiler-compute opencl_on_rocclr openmp_extras rocm_bandwidth_test rocm_smi_lib \
 	rocm-cmake rocm-core rocm-gdb rocminfo rocprofiler-register rocprofiler-sdk rocprofiler-systems \
-	rocprofiler rocr rocr_debug_agent roctracer
-$(call adddep,rocm-dev,$(filter-out ${NOBUILD} kernel_ubuntu,${rocm-dev-components}))
+	rocprofiler rocr rocr_debug_agent rocrsamples roctracer
+$(call adddep,rocm-dev,$(filter-out ${NOBUILD},${rocm-dev-components}))

 $(call adddep,amdmigraphx,hip_on_rocclr half rocblas miopen-hip lightning hipcc hiptensor)
 $(call adddep,composable_kernel,lightning hipcc hip_on_rocclr rocm-cmake)
 $(call adddep,half,rocm-cmake)
 $(call adddep,hipblas-common,lightning)
 $(call adddep,hipblas,hip_on_rocclr rocblas rocsolver lightning hipcc)
-$(call adddep,hipblaslt,hip_on_rocclr openmp_extras lightning hipcc hipblas-common rocm-dev)
+$(call adddep,hipblaslt,hip_on_rocclr openmp_extras lightning hipcc hipblas-common roctracer)
 $(call adddep,hipcub,hip_on_rocclr rocprim lightning hipcc)
 $(call adddep,hipfft,hip_on_rocclr openmp_extras rocfft rocrand hiprand lightning hipcc)
 $(call adddep,hipfort,rocblas hipblas rocsparse hipsparse rocfft hipfft rocrand hiprand rocsolver hipsolver lightning hipcc)
@@ -113,26 +135,34 @@ $(call adddep,hipsparse,hip_on_rocclr rocsparse lightning hipcc)
 $(call adddep,hipsparselt,hip_on_rocclr hipsparse lightning hipcc openmp_extras)
 $(call adddep,hiptensor,hip_on_rocclr composable_kernel lightning hipcc)
 $(call adddep,miopen-deps,lightning hipcc)
-$(call adddep,miopen-hip,composable_kernel half hip_on_rocclr miopen-deps hipblas hipblaslt rocrand roctracer lightning hipcc)
+$(call adddep,miopen-hip,rocm-core composable_kernel half hip_on_rocclr miopen-deps hipblas hipblaslt rocrand roctracer lightning hipcc)
 $(call adddep,mivisionx,amdmigraphx miopen-hip rpp lightning hipcc)
 $(call adddep,rccl,rocm-core hip_on_rocclr rocr lightning hipcc rocm_smi_lib hipify_clang)
-$(call adddep,rdc,rocm_smi_lib rocprofiler rocmvalidationsuite)
+$(call adddep,rdc,amd_smi_lib rocprofiler-sdk rocm_smi_lib rocprofiler  rocmvalidationsuite)
 $(call adddep,rocalution,rocblas rocsparse rocrand lightning hipcc)
-$(call adddep,rocblas,hip_on_rocclr openmp_extras lightning hipcc hipblaslt)
+$(call adddep,rocblas,rocminfo hip_on_rocclr openmp_extras lightning hipcc hipblaslt)
 $(call adddep,rocal,mivisionx)
-$(call adddep,rocdecode,hip_on_rocclr lightning hipcc amdmigraphx)
+$(call adddep,rocdecode,hip_on_rocclr lightning hipcc)
 $(call adddep,rocfft,hip_on_rocclr rocrand hiprand lightning hipcc openmp_extras)
-$(call adddep,rocjpeg,hip_on_rocclr lightning hipcc rocm-dev)
+$(call adddep,rocjpeg,hip_on_rocclr lightning hipcc)
 $(call adddep,rocmvalidationsuite,hip_on_rocclr rocr hipblas hiprand hipblaslt rocm-core lightning hipcc rocm_smi_lib)
 $(call adddep,rocprim,hip_on_rocclr lightning hipcc)
 $(call adddep,rocrand,hip_on_rocclr lightning hipcc)
+$(call adddep,rocshmem,rccl )
 $(call adddep,rocsolver,hip_on_rocclr rocblas rocsparse rocprim lightning hipcc)
 $(call adddep,rocsparse,hip_on_rocclr rocprim lightning hipcc)
 $(call adddep,rocthrust,hip_on_rocclr rocprim lightning hipcc)
 $(call adddep,rocwmma,hip_on_rocclr rocblas lightning hipcc rocm-cmake rocm_smi_lib)
 $(call adddep,rpp,half lightning hipcc openmp_extras)
 $(call adddep,transferbench,hip_on_rocclr lightning hipcc)
+ifneq ($(filter rocm-dev upload-rocm-dev, ${MAKECMDGOALS}),)
+	components = $(rocm-dev-components)
+endif
+$(call adddep,rocm,$(filter-out ${NOBUILD} rocm,${components}))

+ifeq ($(DISTRO_NAME),rhel)
+    WHL_GEN :=
+endif

 # -------------------------------------------------------------------------
 # The rest of the file is internal
@@ -165,7 +195,7 @@ ifeq (${toplevel},)
 define toplevel =

 # The "target" make, this builds the package if it is out of date
-T_$1: ${OUT_DIR}/logs/$1 FRC
+T_$1: ${OUT_DIR}/logs/$1.txt FRC
 	:              $1 built

 # The "upload" for $1, it uploads the packages for $1 to the central storage
@@ -176,25 +206,26 @@ U_$1: T_$1 FRC
 # The "clean" for $1, it just marks the target as not existing so it will be built
 # in the future.
 C_$1: FRC
-	rm -f ${OUT_DIR}/logs/$1 ${OUT_DIR}/logs/$1.repackaged
+	rm -f ${OUT_DIR}/logs/$1.txt ${OUT_DIR}/logs/$1.repackaged

-# parallel build {
-${OUT_DIR}/logs/$1: | ${OUT_DIR}/logs
-ifneq ($(wildcard ${OUT_DIR}/logs/$1.repackaged),)
+# parallel build
+${OUT_DIR}/logs/$1.txt: | ${OUT_DIR}/logs
+ifneq ($(wildcard ${OUT_DIR}/logs/$1.repackaged),) # {
 	@echo  Skipping build of $1 as it has already been repackaged
-	cat $$@.repackaged > $$@
-	rm -f $$@.repackaged
+	cat $${@:.txt=.repackaged} > $$@
+	rm -f $${@:.txt=.repackaged}
 else # } {
 	@echo  $1 started due to $$? | sed "s:${OUT_DIR}/logs/::g"
 # Build in a subshell so we get the time output
 # Pass in jobserver info using the RMAKE variable
-	${RMAKE}@( if set -x && source $${INFRA_REPO}/envsetup.sh && \
-	rm -f $$@.errors $$@ $$@.repackaged && \
-	$${INFRA_REPO}/build_$1.sh -c && \
-	time bash -x $${INFRA_REPO}/build_$1.sh $${RELEASE_FLAG} $${SANITIZER_FLAG} && $${INFRA_REPO}/post_inst_pkg.sh "$1" ; \
-	then mv $$@.inprogress $$@ ; \
-	else mv $$@.inprogress $$@.errors ; echo Error in $1 >&2 ; exit 1 ;\
-	fi ) > $$@.inprogress 2>&1
+# Allow project specific flags e..g. ROCMBUILD_lightning.
+	${RMAKE}${SILENT}( if set -x && source $${INFRA_REPO}/envsetup.sh && \
+	rm -f $${@:$1.txt=1.Errors.$1.txt} $$@ $${@:.txt=.repackaged} && \
+	$${INFRA_REPO}/runner $1 $${RELEASE_FLAG} $${SANITIZER_FLAG} $${STATIC_FLAG} ${ROCMBUILD_$1}; \
+	then mv $${@:$1.txt=2.Inprogress.$1.txt} $$@ ; \
+	else mv $${@:$1.txt=2.Inprogress.$1.txt} $${@:$1.txt=1.Errors.$1.txt} ;\
+		echo Error in $1 >&2 ; exit 1 ;\
+	fi ) > $${@:$1.txt=2.Inprogress.$1.txt} 2>&1
 endif # }

 # end of toplevel macro
@@ -227,22 +258,45 @@ upload-rocm-dev: $(addprefix U_,$(filter-out ${NOBUILD},${components}))
 rocm-dev: $(addprefix T_,$(filter-out ${NOBUILD},${components}))
 	@echo rocm-dev built

+ifeq ($(DISTRO_NAME),almalinux)
+	@sudo chmod -R 777 "/home/builder"
+endif
+
+# This code is broken. It stops us exiting a container and
+# starting a new one and continueing the build. The attempt
+# is to have run-once code.
 ${OUT_DIR}/logs:
 	sudo mkdir -p -m 775 "${ROCM_INSTALL_PATH}" && \
 	sudo chown -R "$(shell id -u):$(shell id -g)" "/opt"
-	sudo chown -R "$(shell id -u):$(shell id -g)" "/home/$(shell id -un)"
+	sudo chown "$(shell id -u):$(shell id -g)" "/home/$(shell id -un)"
 	mkdir -p "${@}"
 	mkdir -p ${HOME}/.ccache

 ##help clean: remove the output directory and recreate it
 clean:
 	[ -n "${OUT_DIR}" ] && rm -rf "${OUT_DIR}"
-	mkdir -p ${OUT_DIR}/logs
+#	mkdir -p ${OUT_DIR}/logs

 .SECONDARY: ${components:%=${OUT_DIR}/logs/%}

 .PHONY: all clean repack help list_components

+# get_all_deps: Recursively get all dependencies for a given component.
+# Usage: $(call get_all_deps,component_name,)
+# - component_name: The name of the component to get dependencies for.
+# - The second parameter is an internal parameter used to track already
+#   processed components to avoid circular dependencies.
+define get_all_deps
+$(if $(filter $(1),$(2)),,\
+	$(sort $(1) $(foreach d,$($(1)_DEPS),$(call get_all_deps,$d,$(1) $(2))))
+)
+endef
+
+##help deps_<component>: output the dependencies for <component>
+deps_%:
+	@echo "=== Dependencies for [$*] ==="
+	@echo "$(filter-out $*,$(call get_all_deps,$*,))"
+
 ##help list_components: output the list of components
 ##help : Hint make list_components | paste - - - | column -t
 list_components:
--- a/tools/rocm-build/build_amd_smi_lib.sh
+++ b/tools/rocm-build/build_amd_smi_lib.sh
@@ -1,4 +1,3 @@
-#!/bin/bash

 source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"

@@ -10,7 +9,9 @@ printUsage() {
    echo "  -c,  --clean              Removes all amd_smi build artifacts"
    echo "  -r,  --release            Build non-debug version amd_smi (default is debug)"
    echo "  -a,  --address_sanitizer  Enable address sanitizer"
-    echo "  -s,  --static             Build static lib (.a).  build instead of dynamic/shared(.so) "
+    echo "  -s,  --static             Component/Build does not support static builds just accepting this param & ignore. No effect of the param on this build"
+    echo "  -w,  --wheel              Creates python wheel package of amd-smi. 
+                                      It needs to be used along with -r option"
    echo "  -o,  --outdir <pkg_type>  Print path of output directory containing packages of type referred to by pkg_type"
    echo "  -p,  --package <type>     Specify packaging format"
    echo "  -h,  --help               Prints this help"
@@ -25,7 +26,6 @@ printUsage() {
 PROJ_NAME="amdsmi"
 PACKAGE_ROOT="$(getPackageRoot)"
 TARGET="build"
-
 PACKAGE_LIB=$(getLibPath)
 PACKAGE_INCLUDE="$(getIncludePath)"
 AMDSMI_BUILD_DIR=$(getBuildPath $PROJ_NAME)
@@ -42,7 +42,7 @@ SHARED_LIBS="ON"
 CLEAN_OR_OUT=0;
 PKGTYPE="deb"

-VALID_STR=`getopt -o hcraso:p: --long help,clean,release,static,address_sanitizer,outdir:,package: -- "$@"`
+VALID_STR=`getopt -o hcraswo:p: --long help,clean,release,static,wheel,address_sanitizer,outdir:,package: -- "$@"`
 eval set -- "$VALID_STR"

 while true ;
@@ -60,6 +60,8 @@ do
                ADDRESS_SANITIZER=true ; shift ;;
        (-s | --static)
                ack_and_skip_static ;;
+        (-w | --wheel)
+                WHEEL_PACKAGE=true ; shift ;;
        (-o | --outdir)
                TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
        (-p | --package)
@@ -135,7 +137,7 @@ verifyEnvSetup

 case $TARGET in
    (clean) clean_amdsmi ;;
-    (build) build_amdsmi ;;
+    (build) build_amdsmi; build_wheel "$AMDSMI_BUILD_DIR" "$PROJ_NAME" ;;
    (outdir) print_output_directory ;;
    (*) die "Invalid target $TARGET" ;;
 esac
--- a/tools/rocm-build/build_amdmigraphx.sh
+++ b/tools/rocm-build/build_amdmigraphx.sh
@@ -1,41 +1,42 @@
 #!/bin/bash

 set -ex
-source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
+source "$(dirname "${BASH_SOURCE[0]}")/compute_utils.sh"

 set_component_src AMDMIGraphX

 build_amdmigraphx() {
    echo "Start build"

+    if [ "${ENABLE_STATIC_BUILDS}" == "true" ]; then
+        ack_and_skip_static
+    fi
+
    cd $COMPONENT_SRC

    if ! command -v rbuild &> /dev/null; then
        pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
    fi

+    # Remove CK
+    xargs -d '\n' -a ${OUT_DIR}/ck.files rm -- || true
+
    if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
         set_asan_env_vars
         set_address_sanitizer_on
    fi

-    if [ -n "$GPU_ARCHS" ]; then
-        GPU_TARGETS="$GPU_ARCHS"
-    else
-        GPU_TARGETS="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx942;gfx1200;gfx1201"
-    fi
    init_rocm_common_cmake_params

    mkdir -p ${BUILD_DIR} && rm -rf ${BUILD_DIR}/* && mkdir -p ${HOME}/amdmigraphx && rm -rf ${HOME}/amdmigraphx/*
    rbuild package -d "${HOME}/amdmigraphx" -B "${BUILD_DIR}" \
-        --cxx="${ROCM_PATH}/llvm/bin/clang++" \
-        --cc="${ROCM_PATH}/llvm/bin/clang" \
+        --cxx="$(set_build_variables __CLANG++__)" \
+        --cc="$(set_build_variables __CLANG__)" \
        "${rocm_math_common_cmake_params[@]}" \
        -DCMAKE_MODULE_LINKER_FLAGS="-Wl,--enable-new-dtags,--build-id=sha1,--rpath,$ROCM_LIB_RPATH" \
-	    -DGPU_TARGETS="${GPU_TARGETS}" \
        -DCMAKE_INSTALL_RPATH=""

-    mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
+    copy_if "${PKGTYPE}" "${CPACKGEN:-"DEB;RPM"}" "${PACKAGE_DIR}" "${BUILD_DIR}"/*."${PKGTYPE}"
    cd $BUILD_DIR && cmake --build . -- install -j${PROC}

    show_build_cache_stats
@@ -50,7 +51,7 @@ clean_amdmigraphx() {
 stage2_command_args "$@"

 case $TARGET in
-    build) build_amdmigraphx ;;
+    build) build_amdmigraphx; build_wheel ;;
    outdir) print_output_directory ;;
    clean) clean_amdmigraphx ;;
    *) die "Invalid target $TARGET" ;;
--- a/tools/rocm-build/build_clang-ocl.sh
+++ b/tools/rocm-build/build_clang-ocl.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
+
+printUsage() {
+    echo
+    echo "Usage: $(basename "${BASH_SOURCE}") [-c|-r|-h] [makeopts]"
+    echo
+    echo "Options:"
+    echo "  -c,  --clean              Removes all clang-ocl build artifacts"
+    echo "  -r,  --release            Build non-debug version clang-ocl (default is debug)"
+    echo "  -a,  --address_sanitizer  Enable address sanitizer"
+    echo "  -o,  --outdir <pkg_type>  Print path of output directory containing packages of
+                                      type referred to by pkg_type"
+    echo "  -h,  --help               Prints this help"
+    echo "  -s,  --static             Supports static CI by accepting this param & not bailing out. No effect of the param though"
+    echo
+
+    return 0
+}
+
+
+TARGET="build"
+CLANG_OCL_DEST="$(getBinPath)"
+CLANG_OCL_SRC_ROOT="$CLANG_OCL_ROOT"
+CLANG_OCL_BUILD_DIR="$(getBuildPath clang-ocl)"
+
+MAKEARG="$DASH_JAY"
+PACKAGE_ROOT="$(getPackageRoot)"
+PACKAGE_UTILS="$(getUtilsPath)"
+CLANG_OCL_PACKAGE_DEB="$PACKAGE_ROOT/deb/clang-ocl"
+CLANG_OCL_PACKAGE_RPM="$PACKAGE_ROOT/rpm/clang-ocl"
+BUILD_TYPE="Debug"
+SHARED_LIBS="ON"
+CLEAN_OR_OUT=0;
+MAKETARGET="deb"
+PKGTYPE="deb"
+
+
+VALID_STR=`getopt -o hcraso:g: --long help,clean,release,clean,static,address_sanitizer,outdir:,gpu_list: -- "$@"`
+eval set -- "$VALID_STR"
+
+while true ;
+do
+    case "$1" in
+        (-h | --help)
+                printUsage ; exit 0;;
+        (-c | --clean)
+                TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
+        (-r | --release)
+                MAKEARG="$MAKEARG BUILD_TYPE=rel" ;  BUILD_TYPE="Release" ; shift ;;
+        (-a | --address_sanitizer)
+                set_asan_env_vars
+                set_address_sanitizer_on ; shift ;;
+        (-s | --static)
+                SHARED_LIBS="OFF" ; shift ;;
+        (-o | --outdir)
+                TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
+        (-g | --gpu_list )
+                GPU_LIST=$2; shift 2 ;;
+        --)     shift; break;;
+        (*)
+                echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
+    esac
+
+done
+
+RET_CONFLICT=1
+check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
+if [ $RET_CONFLICT -ge 30 ]; then
+   print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
+   exit $RET_CONFLICT
+fi
+
+clean_clang-ocl() {
+    echo "Removing clang-ocl"
+    rm -rf $CLANG_OCL_DEST/clang-ocl
+    rm -rf $CLANG_OCL_BUILD_DIR
+    rm -rf $CLANG_OCL_PACKAGE_DEB
+    rm -rf $CLANG_OCL_PACKAGE_RPM
+}
+
+build_clang-ocl() {
+    if [ ! -d "$CLANG_OCL_BUILD_DIR" ]; then
+        mkdir -p $CLANG_OCL_BUILD_DIR
+        pushd $CLANG_OCL_BUILD_DIR
+
+        if [ -e $PACKAGE_ROOT/lib/bitcode/opencl.amdgcn.bc ]; then
+            BC_DIR="$ROCM_INSTALL_PATH/lib"
+        else
+            BC_DIR="$ROCM_INSTALL_PATH/amdgcn/bitcode"
+        fi
+
+        cmake \
+            $(rocm_cmake_params) \
+            -DDISABLE_CHECKS="ON" \
+            -DCLANG_BIN="$ROCM_INSTALL_PATH/llvm/bin" \
+            -DBITCODE_DIR="$BC_DIR" \
+	    $(rocm_common_cmake_params) \
+            -DCPACK_SET_DESTDIR="OFF" \
+            $CLANG_OCL_SRC_ROOT
+
+        echo "Making clang-ocl:"
+        cmake --build . -- $MAKEARG
+        cmake --build . -- $MAKEARG install
+        cmake --build . -- $MAKEARG package
+        popd
+    fi
+
+    copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_DEB" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.deb
+    copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_RPM" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.rpm
+}
+
+
+print_output_directory() {
+     case ${PKGTYPE} in
+         ("deb")
+             echo ${CLANG_OCL_PACKAGE_DEB};;
+         ("rpm")
+             echo ${CLANG_OCL_PACKAGE_RPM};;
+         (*)
+             echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
+     esac
+     exit
+}
+
+case $TARGET in
+    (clean) clean_clang-ocl ;;
+    (build) build_clang-ocl ;;
+   (outdir) print_output_directory ;;
+        (*) die "Invalid target $TARGET" ;;
+esac
+
+echo "Operation complete"
+exit 0
+
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
chiranjeevipattigidi	3337b4fd2a	Update packages - remove broken packages	2025-05-21 08:10:47 -04:00
cpattigi	a069100d47	Update envsetup.sh HIP_ON_ROCclr_ROOT path to hip and remove aqlprofiletest	2025-05-20 08:13:44 -04:00
ammallya	542d7813ce	Removing aqlprofiletest	2025-04-14 15:26:24 -07:00
ammallya	bc1ffe4fcb	bypass tests	2025-04-14 13:41:34 -07:00
ammallya	09997c68bb	Removing kfd test	2025-04-14 12:55:13 -07:00
ammallya	42bc3501ac	Merge pull request #4623 from ammallya/roc-6.4.x Rebasing branch 6.4.x	2025-04-14 11:42:06 -07:00
Daniel Su	ec36f39665	Ex CI: fix comparison in aggregatePipeline test condition (#4622 )	2025-04-14 12:22:29 -04:00
Peter Park	310864e653	fix link to Dockerfile.rocm (#4573 )	2025-04-14 10:10:03 -04:00
Pratik Basyal	330aa6f23d	640 known issue GitHub link update in develop (#4617 ) * Date updated * GitHub issue links added	2025-04-11 21:13:10 -04:00
Peter Park	eb090b8788	Fix word (#4600 )	2025-04-11 18:07:08 -04:00
Pratik Basyal	af18a170bc	Blog link update to 6.4.0 release notes #4596 Blog link update to 6.4.0 release notes	2025-04-11 17:48:42 -04:00
Peter Park	8b423430a4	Fix markdownlint errors (#4597 ) * deal with markdownlint errors * fix mdlint errors in ubuntu24/README.md	2025-04-11 17:32:52 -04:00
Daniel Su	21b22f89f5	Ex CI: update to ROCm 6.4.0 (#4598 )	2025-04-11 17:32:27 -04:00
Joseph Macaranas	87b9fac022	External CI: Boilerplate code for aggregate build pipeline (#4496 ) - Add knobs to toggle aggregate build options. - Aggregate build pipeline will pull ROCm dependencies from earlier in the same pipeline. - Changing build pool of some components for more compute power. - Deleting deprecated component. - Add Ninja to dependency compilation in MIOpen. - Add retries to wget for MIOpen CK build case. --------- Co-authored-by: Daniel Su <danielsu@amd.com>	2025-04-11 17:13:58 -04:00
Peter Park	656db2bc84	Update KMD versions in compat matrix (#4594 ) * update KMD versions in compat matrix * update historical compat matrix	2025-04-11 16:48:21 -04:00
Ameya Keshava Mallya	295e1e2998	Updating scripts for 6.4	2025-04-11 20:44:44 +00:00
Pratik Basyal	6770798faf	Link update (#4591 )	2025-04-11 16:08:58 -04:00