Update docs/compatibility/ml-compatibility/pytorch-compatibility.rst

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
WIP
2026-01-11 07:38:17 -05:00 · 2025-04-25 20:45:27 +02:00 · 2025-04-25 14:43:24 +02:00 · 2025-04-25 14:43:24 +02:00
146 changed files with 5313 additions and 14072 deletions
--- a/.azuredevops/ci-builds/mathlibs-trigger.yml
+++ b/.azuredevops/ci-builds/mathlibs-trigger.yml
@@ -1,33 +0,0 @@
-variables:
- group: common
- template: /.azuredevops/variables-global.yml@pipelines_repo
-
-parameters:
- name: pipelinesRepoRef
-  type: string
-  default: refs/heads/develop
- name: librariesRepoRef
-  type: string
-  default: refs/heads/develop
-
-resources:
-  repositories:
-  - repository: pipelines_repo
-    type: github
-    endpoint: ROCm
-    name: ROCm/ROCm
-    ref: ${{ parameters.pipelinesRepoRef }}
-  - repository: libraries_repo
-    type: github
-    endpoint: ROCm
-    name: ROCm/rocm-libraries
-    ref: ${{ parameters.librariesRepoRef }}
-
-trigger: none
-pr: none
-
-jobs:
-  - template: /.azuredevops/ci-builds/mathlibs.yml@pipelines_repo
-    parameters:
-      checkoutRepo: libraries_repo
-      buildDependsOn: false
--- a/.azuredevops/ci-builds/mathlibs.yml
+++ b/.azuredevops/ci-builds/mathlibs.yml
@@ -1,38 +0,0 @@
-# entrypoint for kicking off a unified build of the mathlibs
-# this template is designed to be called by another pipeline (llvm, clr, etc.)
-# `buildDependsOn` will need to be set when calling this template
-# passes a `unifiedBuild` param to downstream pipelines, which will prevent duplicate jobs
-# logic needs to be added in individual mathlib pipelines for handling `unifiedBuild`
-
-parameters:
- name: checkoutRepo
-  type: string
-  default: monorepo
- name: buildDependsOn
-  type: object
-  default: false
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - rocRAND:
-      name: rocRAND
-      sparseCheckoutDir: projects/rocrand
-    - rocPRIM:
-      name: rocPRIM
-      sparseCheckoutDir: projects/rocprim
-    - hipBLAS-common:
-      name: hipBLAS-common
-      sparseCheckoutDir: projects/hipblas-common
-    # - composable_kernel:
-    #   name: composable_kernel
-    #   sparseCheckoutDir: projects/composablekernel
-
-jobs:
- ${{ each component in parameters.downstreamComponentMatrix }}:
-  - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-    parameters:
-      checkoutRepo: ${{ parameters.checkoutRepo }}
-      sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-      buildDependsOn: ${{ parameters.buildDependsOn }}
-      triggerDownstreamJobs: true
-      unifiedBuild: true
--- a/.azuredevops/components/HIP.yml
+++ b/.azuredevops/components/HIP.yml
@@ -20,7 +20,7 @@ parameters:
    - ocl-icd-libopencl1
    - ocl-icd-opencl-dev
    - opencl-headers
-    - zlib1g-dev
+    - python3-pip
 - name: pipModules
  type: object
  default:
@@ -41,148 +41,118 @@ parameters:
 # any changes for clr should just trigger HIP pipeline
 # similarly for hipother repo, for Nvidia backend

- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-
 # HIP with AMD backend
 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: hip_clr_combined_${{ job.os }}_amd
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-  # checkout triggering repo (either HIP or clr)
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-  # if this is triggered by HIP repo, matching repo is clr
-  # if this is triggered by clr repo, matching repo is HIP
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: matching_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: hipother_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependenciesAMD }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-  # compile clr
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: clr
-        cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-          -DHIP_PLATFORM=amd
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
-          -DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
-          -DCLR_BUILD_HIP=ON
-          -DCLR_BUILD_OCL=ON
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        artifactName: amd
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        artifactName: amd
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     pipModules: ${{ parameters.pipModules }}
-    #     environment: amd
+- job: hip_clr_combined_amd
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      pipModules: ${{ parameters.pipModules }}
+# checkout triggering repo (either HIP or clr)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+# if this is triggered by HIP repo, matching repo is clr
+# if this is triggered by clr repo, matching repo is HIP
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: matching_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: hipother_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependenciesAMD }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+# compile clr
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: clr
+      cmakeBuildDir: 'clr/build'
+      extraBuildFlags: >-
+        -DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
+        -DHIP_PLATFORM=amd
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DROCM_PATH=$(Agent.BuildDirectory)/rocm
+        -DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
+        -DCLR_BUILD_HIP=ON
+        -DCLR_BUILD_OCL=ON
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+    parameters:
+      artifactName: amd
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: amd
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     pipModules: ${{ parameters.pipModules }}
+  #     environment: amd

 # HIP with Nvidia backend
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: hip_clr_combined_${{ job.os }}_nvidia
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-  # checkout triggering repo (either HIP or clr)
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-  # if this is triggered by HIP repo, matching repo is clr
-  # if this is triggered by clr repo, matching repo is HIP
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: matching_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: hipother_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependenciesNvidia }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-    - script: 'ls -1R $(Agent.BuildDirectory)/rocm'
-      displayName: 'Artifact listing'
-  # compile clr
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: clr
-        cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-          -DHIP_PLATFORM=nvidia
-          -DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
-          -DCLR_BUILD_HIP=ON
-          -DCLR_BUILD_OCL=OFF
-          -DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        artifactName: nvidia
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     pipModules: ${{ parameters.pipModules }}
-    #     environment: nvidia
+- job: hip_clr_combined_nvidia
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      pipModules: ${{ parameters.pipModules }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+# checkout triggering repo (either HIP or clr)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+# if this is triggered by HIP repo, matching repo is clr
+# if this is triggered by clr repo, matching repo is HIP
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: matching_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: hipother_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependenciesNvidia }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - script: 'ls -1R $(Agent.BuildDirectory)/rocm'
+    displayName: 'Artifact listing'
+# compile clr
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: clr
+      cmakeBuildDir: 'clr/build'
+      extraBuildFlags: >-
+        -DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
+        -DHIP_PLATFORM=nvidia
+        -DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
+        -DCLR_BUILD_HIP=ON
+        -DCLR_BUILD_OCL=OFF
+        -DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: nvidia
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     pipModules: ${{ parameters.pipModules }}
+  #     environment: nvidia
--- a/.azuredevops/components/HIPIFY.yml
+++ b/.azuredevops/components/HIPIFY.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: HIPIFY
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -16,140 +13,112 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - cuda-toolkit-12-9
-    - libcudnn9-dev-cuda-12
-    - libnuma-dev
-    - mesa-common-dev
+    - cmake
    - ninja-build
-    - python-is-python3
+    - libnuma-dev
    - python3-dev
    - python3-pip
- name: pipModules
-  type: object
-  default:
-    - lit
- name: rocmDependencies
-  type: object
-  default:
-    - llvm-project
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+    - python-is-python3
+    - mesa-common-dev
+    - ccache
+    - cuda-toolkit
+    - cudnn

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        name: rocm-ci_medium_build_pool_2404
-      ${{ else }}:
-        name: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - task: Bash@3
-      displayName: 'Register CUDA packages'
-      inputs:
-        targetType: inline
-        ${{ if eq(job.os, 'ubuntu2204') }}:
-          script: |
-            wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-            sudo dpkg -i cuda-keyring_1.1-1_all.deb
-            sudo rm -f cuda-keyring_1.1-1_all.deb
-            sudo apt update
-        ${{ if eq(job.os, 'almalinux8') }}:
-          script: |
-            sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
-    - task: Bash@3
-      displayName: Add lit to PATH
-      inputs:
-        targetType: inline
-        script: |
-          site_packages=$(python3 -m site --user-base)/bin
-          sudo ln -sf $site_packages/bin/lit $(Pipeline.Workspace)/llvm-lit
-          echo "##vso[task.prependpath]$site_packages"
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-    # cutensor is not available from apt or dnf
-    - task: Bash@3
-      displayName: 'Download and install cutensor'
-      inputs:
-        targetType: inline
-        script: |
-          wget -q --show-progress https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-x86_64/libcutensor-linux-x86_64-2.2.0.0-archive.tar.xz
-          tar -xvJf libcutensor-linux-x86_64-*.tar.xz
-          mkdir -p $(Pipeline.Workspace)/cutensor
-          cp -r libcutensor-linux-x86_64-*/* $(Pipeline.Workspace)/cutensor/
-    - task: Bash@3
-      displayName: 'List downloaded CUDA files'
-      inputs:
-        targetType: inline
-        script: ls -la1R /usr/local/cuda-12.9
-    # script: cp $(Pipeline.Workspace)/llvm-project/llvm/build/bin/FileCheck $(Pipeline.Workspace)/llvm/bin
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;/usr/local/cuda/targets/x86_64-linux/lib
-          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-          -DHIPIFY_CLANG_TESTS=ON
-          -DCMAKE_BUILD_TYPE=Release
-          -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.9
-          -DCUDA_DNN_ROOT_DIR=/usr/local/cuda-12.9
-          -DCUDA_CUB_ROOT_DIR=/usr/local/cuda-12.9/targets/x86_64-linux/include/cub
-          -DCUDA_TENSOR_ROOT_DIR=$(Pipeline.Workspace)/cutensor/
-        multithreadFlag: -- -j32
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-    #  parameters:
-    #    componentName: HIPIFY
-    #    testDir: $(Build.SourcesDirectory)/build
-    #    testExecutable: make
-    #    testParameters: -j 32 test-hipify
-    #    testPublishResults: false
-    #    os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: combined
-          registerCUDAPackages: true
-          extraCopyDirectories:
-            - llvm-project
+- job: HIPIFY
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  - name: UPSTREAM_LLVM_GIT_URL
+    value: https://github.com/llvm/llvm-project.git
+  - name: UPSTREAM_LLVM_TAG
+    value: llvmorg-18.1.2
+  pool: ${{ variables.MEDIUM_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - task: Bash@3
+    displayName: 'Register CUDA packages'
+    inputs:
+      targetType: inline
+      script: |
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        sudo dpkg -i cuda-keyring_1.1-1_all.deb
+        sudo rm -f cuda-keyring_1.1-1_all.deb
+        sudo apt update
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - task: Bash@3
+    displayName: git clone upstream llvm-project
+    inputs:
+      targetType: inline
+      script: git clone $(UPSTREAM_LLVM_GIT_URL) --depth=1 --branch $(UPSTREAM_LLVM_TAG) --recurse-submodules
+      workingDirectory: $(Pipeline.Workspace)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - script: |
+      mkdir -p $(CCACHE_DIR)
+      echo "##vso[task.prependpath]/usr/lib/ccache:/usr/local/cuda/bin"
+    displayName: Update path for cuda and ccache
+  - task: Cache@2
+    displayName: Ccache caching
+    inputs:
+      key: HIPIFY | $(Agent.OS) | "$(UPSTREAM_LLVM_TAG)"
+      path: $(CCACHE_DIR)
+      restoreKeys: HIPIFY | $(Agent.OS)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: upstream-llvm
+      cmakeBuildDir: $(Pipeline.Workspace)/llvm-project/llvm/build
+      installDir: $(Pipeline.Workspace)/llvm
+      extraBuildFlags: >-
+        -DCMAKE_BUILD_TYPE=Release
+        -DLLVM_ENABLE_PROJECTS=clang
+        -DLLVM_INCLUDE_TESTS=OFF
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache
+        -GNinja
+  - task: Bash@3
+    displayName: python install lit
+    inputs:
+      targetType: inline
+      script: sudo python3 $(Pipeline.Workspace)/llvm-project/llvm/utils/lit/setup.py install
+  - task: Bash@3
+    displayName: install FileCheck
+    inputs:
+      targetType: inline
+      script: cp $(Pipeline.Workspace)/llvm-project/llvm/build/bin/FileCheck $(Pipeline.Workspace)/llvm/bin
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: HIPIFY
+      extraBuildFlags: >-
+        -DHIPIFY_CLANG_TESTS=ON
+        -DCMAKE_BUILD_TYPE=Release
+        -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/targets/x86_64-linux
+        -DCUDA_DNN_ROOT_DIR=/usr/local/cuda/targets/x86_64-linux
+        -DCMAKE_PREFIX_PATH=$(Pipeline.Workspace)/llvm;/usr/local/cuda/targets/x86_64-linux/lib
+        -DLLVM_EXTERNAL_LIT=$(Pipeline.Workspace)/llvm-project/llvm/build/bin/llvm-lit
+      multithreadFlag: -- -j32
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: HIPIFY
+      testDir: $(Build.SourcesDirectory)/build
+      testExecutable: make
+      testParameters: test-hipify
+      testPublishResults: false
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      environment: combined
+      registerCUDAPackages: true
+      extraCopyDirectories:
+        - llvm-project
+      extraEnvVars:
+        - UPSTREAM_LLVM_GIT_URL:::https://github.com/llvm/llvm-project.git
+        - UPSTREAM_LLVM_TAG:::llvmorg-18.1.2
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -16,7 +16,6 @@ parameters:
    - cmake
    - jq
    - libdrm-dev
-    - libmsgpack-dev
    - libsqlite3-dev
    - libstdc++-12-dev
    - ninja-build
@@ -215,7 +214,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
        componentName: MIOpen
-        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
+        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex test_rnn_seq_api'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/MIVisionX.yml
+++ b/.azuredevops/components/MIVisionX.yml
@@ -43,20 +43,18 @@ parameters:
 - name: rocmDependencies
  type: object
  default:
-    - AMDMIGraphX
-    - clr
-    - half
-    - hipBLAS-common
-    - hipBLASLt
-    - llvm-project
-    - MIOpen
-    - rocBLAS
-    - rocDecode
    - rocm-cmake
+    - llvm-project
+    - ROCR-Runtime
+    - clr
    - rocminfo
    - rocprofiler-register
-    - ROCR-Runtime
+    - half
+    - rocBLAS
+    - MIOpen
+    - AMDMIGraphX
    - rpp
+    - rocDecode
 - name: rocmTestDependencies
  type: object
  default:
@@ -92,7 +90,8 @@ jobs:
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/ROCR-Runtime.yml
+++ b/.azuredevops/components/ROCR-Runtime.yml
@@ -20,6 +20,7 @@ parameters:
    - libnuma-dev
    - ninja-build
    - pkg-config
+    - python3-pip
 - name: rocmDependencies
  type: object
  default:
@@ -35,65 +36,51 @@ parameters:
 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ROCR_Runtime_build_${{ job.os }}
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DBUILD_SHARED_LIBS=ON
-          -DCMAKE_BUILD_TYPE=Release
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
+- job: ROCR_Runtime_build
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DBUILD_SHARED_LIBS=ON
+        -DCMAKE_BUILD_TYPE=Release
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ROCR_Runtime_build_${{ job.os }}
+  - job: ROCR_Runtime_test_${{ job.target }}
+    dependsOn: ROCR_Runtime_build
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -110,7 +97,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - task: Bash@3
      displayName: Install libhwloc5
      inputs:
@@ -121,15 +107,12 @@ jobs:
          sudo apt install -y --allow-downgrades ./libhwloc5_1.11.12-3_amd64.deb ./libhwloc-dev_1.11.12-3_amd64.deb
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
@@ -138,13 +121,11 @@ jobs:
        runRocminfo: false
    - task: Bash@3
      displayName: Build kfdtest
+      continueOnError: true
      inputs:
        targetType: 'inline'
        workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
        script: |
-          if [ -e /opt/rh/gcc-toolset-14/enable ]; then
-            source /opt/rh/gcc-toolset-14/enable
-          fi
          mkdir build && cd build
          cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
          make
@@ -154,16 +135,13 @@ jobs:
        testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
        testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
        testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
-        os: ${{ job.os }}
    - task: Bash@3
      displayName: Build rocrtst
+      continueOnError: true
      inputs:
        targetType: 'inline'
        workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
        script: |
-          if [ -e /opt/rh/gcc-toolset-14/enable ]; then
-            source /opt/rh/gcc-toolset-14/enable
-          fi
          BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
          export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
          mkdir build && cd build
@@ -181,7 +159,6 @@ jobs:
        testExecutable: ./rocrtst64
        testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
        testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/ROCdbgapi.yml
+++ b/.azuredevops/components/ROCdbgapi.yml
@@ -15,6 +15,7 @@ parameters:
  default:
    - cmake
    - ninja-build
+    - python3-pip
 - name: rocmDependencies
  type: object
  default:
@@ -23,57 +24,37 @@ parameters:
    - rocminfo
    - ROCR-Runtime

- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-
 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ROCdbgapi_build_${{ job.os }}
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_BUILD_TYPE=Release
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
+- job: ROCdbgapi
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_BUILD_TYPE=Release
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/ROCgdb.yml
+++ b/.azuredevops/components/ROCgdb.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: ROCgdb
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -18,7 +15,6 @@ parameters:
  type: object
  default:
    - bison
-    - cmake
    - dejagnu
    - flex
    - libbabeltrace-dev
@@ -26,10 +22,8 @@ parameters:
    - libgmp-dev
    - liblzma-dev
    - libmpfr-dev
-    - ncurses-dev
    - pkg-config
-    - python3-dev
-    - python3-pip
+    - ncurses-dev
    - texinfo
    - zlib1g-dev
 - name: rocmDependencies
@@ -45,53 +39,45 @@ parameters:
 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+    buildTestJobs:
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
+- ${{ each job in parameters.jobMatrix.buildTestJobs }}:
+  - job: ROCgdb_build_test_${{ job.target }}
+    condition:
+      and(
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: PKG_CONFIG_PATH
      value: $(Agent.BuildDirectory)/rocm/share/pkgconfig
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        name: rocm-ci_medium_build_pool_2404
-      ${{ else }}:
-        name: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
+    pool: ${{ job.target }}_test_pool
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
      parameters:
-        os: ${{ job.os }}
        configureFlags: >-
          --program-prefix=roc
          --enable-64-bit-bfd
@@ -114,74 +100,7 @@ jobs:
          LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
        makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-
- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
-    condition:
-      and(succeeded(),
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-        eq(${{ parameters.aggregatePipeline }}, False)
-      )
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: PKG_CONFIG_PATH
-      value: $(Agent.BuildDirectory)/rocm/share/pkgconfig
-    pool: ${{ job.target }}_test_pool
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
-      parameters:
-        os: ${{ job.os }}
-        configureFlags: >-
-          --program-prefix=roc
-          --enable-64-bit-bfd
-          --enable-targets="x86_64-linux-gnu,amdgcn-amd-amdhsa"
-          --disable-ld
-          --disable-gas
-          --disable-gdbserver
-          --disable-sim
-          --enable-tui
-          --disable-gdbtk
-          --disable-shared
-          --disable-gprofng
-          --with-expat
-          --with-system-zlib
-          --without-guile
-          --with-babeltrace
-          --with-lzma
-          --with-python=python3
-          --with-rocm-dbgapi=$(Agent.BuildDirectory)/rocm
-          LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
-        makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
    - task: Bash@3
      displayName: Setup test environment
      inputs:
@@ -190,15 +109,14 @@ jobs:
          # Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
          sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
          echo "##vso[task.prependpath]/opt/rocm/bin"
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - task: Bash@3
      displayName: check-gdb
      continueOnError: true
      inputs:
        targetType: inline
-        script: |
-          ${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
-          make check-gdb TESTS=gdb.rocm/simple.exp
+        script: make check-gdb TESTS=gdb.rocm/simple.exp
        workingDirectory: $(Build.SourcesDirectory)
    - task: Bash@3
      displayName: print gdb log
--- a/.azuredevops/components/ROCmValidationSuite.yml
+++ b/.azuredevops/components/ROCmValidationSuite.yml
@@ -27,7 +27,6 @@ parameters:
  type: object
  default:
    - amdsmi
-    - aomp
    - clr
    - hipBLAS-common
    - hipBLASLt
@@ -44,7 +43,6 @@ parameters:
  type: object
  default:
    - amdsmi
-    - aomp
    - clr
    - hipBLAS-common
    - hipBLASLt
@@ -110,7 +108,6 @@ jobs:
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/llvm/include
          -DCPACK_PACKAGING_INSTALL_PREFIX=$(Build.BinariesDirectory)
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
--- a/.azuredevops/components/Tensile.yml
+++ b/.azuredevops/components/Tensile.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: Tensile
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -32,6 +13,7 @@ parameters:
 - name: aptPackages
  type: object
  default:
+    - python3-pip
    - cmake
    - libmsgpack-dev
    - libboost-program-options-dev
@@ -56,97 +38,75 @@ parameters:
 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn: ${{ parameters.buildDependsOn[job.target] }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: ROCM_PATH
-      value: $(Agent.BuildDirectory)/rocm
-    pool:
-      vmImage: ${{ variables.BASE_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - task: Bash@3
-      displayName: Create wheel file
-      inputs:
-        targetType: inline
-        script: python3 setup.py bdist_wheel
-        workingDirectory: $(Agent.BuildDirectory)/s
-    - task: Bash@3
-      displayName: Rename wheel file with job OS
-      inputs:
-        targetType: inline
-        workingDirectory: $(Agent.BuildDirectory)/s
-        script: |
-          wheelFile=$(find "$(Agent.BuildDirectory)/s/dist" -type f -name "*.whl" | head -n 1)
-          newWheelFile="$(basename "$wheelFile" .whl)-${{ job.os }}.whl"
-          mv "$wheelFile" "$(dirname "$wheelFile")/$newWheelFile"
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
-      parameters:
-        sourceDir: $(Agent.BuildDirectory)/s/dist
-        contentsString: '*.whl'
-        targetDir: $(Build.ArtifactStagingDirectory)
-        clean: false
-    - task: PublishPipelineArtifact@1
-      displayName: 'wheel file Publish'
-      retryCountOnTaskFailure: 3
-      inputs:
-        targetPath: $(Build.ArtifactStagingDirectory)
-    - task: Bash@3
-      displayName: Save pipeline artifact file names
-      inputs:
-        workingDirectory: $(Pipeline.Workspace)
-        targetType: inline
-        script: |
-          whlFile=$(find "$(Build.ArtifactStagingDirectory)" -type f -name "*.whl" | head -n 1)
-          if [ -n "$whlFile" ]; then
-            echo $(basename "$whlFile") >> pipelineArtifacts.txt
-          fi
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     pipModules: ${{ parameters.pipModules }}
+- job: Tensile_build
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  - name: ROCM_PATH
+    value: $(Agent.BuildDirectory)/rocm
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      pipModules: ${{ parameters.pipModules }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - task: Bash@3
+    displayName: Create wheel file
+    inputs:
+      targetType: inline
+      script: python3 setup.py bdist_wheel
+      workingDirectory: $(Build.SourcesDirectory)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
+    parameters:
+      sourceDir: $(Build.SourcesDirectory)/dist
+      contentsString: '*.whl'
+      targetDir: $(Build.ArtifactStagingDirectory)
+      clean: false
+  - task: PublishPipelineArtifact@1
+    displayName: 'wheel file Publish'
+    retryCountOnTaskFailure: 3
+    inputs:
+      targetPath: $(Build.ArtifactStagingDirectory)
+  - task: Bash@3
+    displayName: Save pipeline artifact file names
+    inputs:
+      workingDirectory: $(Pipeline.Workspace)
+      targetType: inline
+      script: |
+        whlFile=$(find "$(Build.ArtifactStagingDirectory)" -type f -name "*.whl" | head -n 1)
+        if [ -n "$whlFile" ]; then
+          echo $(basename "$whlFile") >> pipelineArtifacts.txt
+        fi
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     pipModules: ${{ parameters.pipModules }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: Tensile_test_${{ job.os }}_${{ job.target }}
+  - job: Tensile_test_${{ job.target }}
    timeoutInMinutes: 180
-    dependsOn: Tensile_build_${{ job.os }}
+    dependsOn: Tensile_build
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -166,23 +126,20 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - task: DownloadPipelineArtifact@2
      displayName: 'Download Pipeline Wheel Files'
      inputs:
-        itemPattern: '**/*${{ job.os }}*.whl'
+        itemPattern: '**/*.whl'
        targetPath: $(Agent.BuildDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - task: Bash@3
      displayName: pip install
@@ -207,7 +164,7 @@ jobs:
      inputs:
        targetType: inline
        script: tox run -v -e ci -- -m pre_checkin
-        workingDirectory: $(Agent.BuildDirectory)/s
+        workingDirectory: $(Build.SourcesDirectory)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/amdsmi.yml
+++ b/.azuredevops/components/amdsmi.yml
@@ -16,66 +16,50 @@ parameters:
    - cmake
    - libdrm-dev
    - ninja-build
+    - python3-pip
    - pkg-config

 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: amdsmi_build_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DBUILD_TESTS=ON
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
+- job: amdsmi_build
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DBUILD_TESTS=ON
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: amdsmi_test_${{ job.os }}_${{ job.target }}
-    dependsOn: amdsmi_build_${{ job.os }}
+  - job: amdsmi_test_${{ job.target }}
+    dependsOn: amdsmi_build
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -92,11 +76,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      parameters:
        runRocminfo: false
@@ -104,9 +85,8 @@ jobs:
      parameters:
        componentName: amdsmi
        testDir: '$(Agent.BuildDirectory)'
-        testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
+        testExecutable: './rocm/share/amd_smi/tests/amdsmitst'
        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/aomp.yml
+++ b/.azuredevops/components/aomp.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: aomp
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -18,187 +15,170 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - bison
-    - ccache
    - cmake
-    - flex
-    - gawk
-    - git
-    - mesa-common-dev
+    - python3-pip
    - ninja-build
-    - libbabeltrace-dev
-    - libbison-dev
+    - pkg-config
+    - libpci-dev
+    - libnuma-dev
+    - libffi-dev
+    - git
+    - libopenmpi-dev
+    - gawk
+    - mesa-common-dev
+    - libtool
    - libdrm-amdgpu1
    - libdrm-dev
    - libdw-dev
-    - libffi-dev
-    - libgmp-dev
-    - liblzma-dev
-    - libmpfr-dev
-    - libncurses5-dev
-    - libnuma-dev
-    - libopenmpi-dev
-    - libpci-dev
+    - libgtest-dev
+    - libsystemd-dev
    - libssl-dev
    - libstdc++-12-dev
-    - libsystemd-dev
-    - libtool
+    - ccache
+    - libgmp-dev
+    - libmpfr-dev
+    - texinfo
+    - libbison-dev
+    - bison
+    - flex
+    - libbabeltrace-dev
+    - libncurses5-dev
+    - liblzma-dev
+    - python3-setuptools
+    - python3-dev
    - libudev-dev
    - parallel
-    - pkg-config
-    - python3-dev
-    - python3-pip
-    - python3-setuptools
-    - texinfo
+  # Referencing comment snippet.
+  #
+  # snippet from https://github.com/ROCm/aomp/blob/aomp-dev/bin/build_aomp.sh#L131-L134
+  #
+  # For ROCM build (AOMP_STANDALONE_BUILD=0) the components roct, rocr,
+  # libdevice, project, comgr, rocminfo, hipamd, rocdbgapi, rocgdb,
+  # roctracer, rocprofiler, rocm_smi_lib, and amdsmi should be found
+  # in ROCM in /opt/rocm.  The ROCM build only needs these components:
 - name: rocmDependencies
  type: object
  default:
-    - llvm-project
-    - ROCR-Runtime
- name: rocmTestDependencies
-  type: object
-  default:
+    - amdsmi
    - clr
    - llvm-project
+    - ROCdbgapi
+    - ROCgdb
+    - rocm-cmake
    - rocm-core
    - rocminfo
-    - ROCR-Runtime
+    - rocm_smi_lib
+    - rocprofiler
    - rocprofiler-register
+    - rocprofiler-sdk
+    - ROCR-Runtime
+    - roctracer

 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        name: rocm-ci_medium_build_pool_2404
-      ${{ else }}:
-        name: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    # checkout the repos tied to openmp-extras, plus llvm-project
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: aomp-extras_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: flang_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: llvm-project_repo
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        componentName: extras
-        cmakeBuildDir: '$(Build.SourcesDirectory)/aomp-extras/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/aomp-extras'
-        installDir: '$(Build.BinariesDirectory)/llvm'
-        extraBuildFlags: >-
-          -DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm
-          -DCMAKE_BUILD_TYPE=Release
-          -DAOMP_STANDALONE_BUILD=0
-          -DAOMP_VERSION_STRING=9.99.99
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        componentName: openmp
-        cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/openmp/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/openmp'
-        installDir: '$(Build.BinariesDirectory)/llvm'
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
-          -DCMAKE_BUILD_TYPE=Release
-          -DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-          -DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-          -DOPENMP_ENABLE_LIBOMPTARGET=1
-          -DLIBOMP_COPY_EXPORTS=OFF
-          -DLIBOMP_OMPD_SUPPORT=ON
-          -DCMAKE_SKIP_INSTALL_RPATH=TRUE
-          -DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
-          -DLIBOMP_FORTRAN_MODULES_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/flang
-          -DLIBOMP_MODULES_INSTALL_PATH=$(Build.BinariesDirectory)/llvm/include/flang/
-        multithreadFlag: -- -j32
-    - task: Bash@3
-      displayName: 'ROCm symbolic link'
-      inputs:
-        targetType: inline
-        script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        componentName: offload
-        cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/offload/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/offload'
-        installDir: '$(Build.BinariesDirectory)/llvm'
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
-          -DCMAKE_BUILD_TYPE=Release
-          -DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-          -DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-          -DCMAKE_SKIP_INSTALL_RPATH=TRUE
-          -DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
-          -DLIBOMPTARGET_LLVM_INCLUDE_DIRS=$(Build.SourcesDirectory)/llvm-project/llvm/include
-          -DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
-          -DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
+- job: aomp
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool: ${{ variables.MEDIUM_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+# checkout the repos tied to openmp-extras, plus llvm-project
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: aomp-extras_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: flang_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: llvm-project_repo
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: extras
+      cmakeBuildDir: '$(Build.SourcesDirectory)/aomp-extras/build'
+      installDir: '$(Build.BinariesDirectory)/llvm'
+      extraBuildFlags: >-
+        -DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm
+        -DCMAKE_BUILD_TYPE=Release
+        -DAOMP_STANDALONE_BUILD=0
+        -DAOMP_VERSION_STRING=9.99.99
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: openmp
+      cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/openmp/build'
+      installDir: '$(Build.BinariesDirectory)/llvm'
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
+        -DCMAKE_BUILD_TYPE=Release
+        -DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
+        -DOPENMP_ENABLE_LIBOMPTARGET=1
+        -DLIBOMP_COPY_EXPORTS=OFF
+        -DLIBOMP_OMPT_SUPPORT=ON
+        -DLIBOMP_OMPD_SUPPORT=ON
+        -DCMAKE_SKIP_INSTALL_RPATH=TRUE
+        -DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
+        -DLIBOMP_FORTRAN_MODULES_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/flang
+        -DLIBOMP_MODULES_INSTALL_PATH=$(Build.BinariesDirectory)/llvm/include/flang/
+        -GNinja
+  - task: Bash@3
+    displayName: 'ROCm symbolic link'
+    inputs:
+      targetType: inline
+      script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: offload
+      cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/offload/build'
+      installDir: '$(Build.BinariesDirectory)/llvm'
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
+        -DCMAKE_BUILD_TYPE=Release
+        -DOPENMP_TEST_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DOPENMP_TEST_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
+        -DCMAKE_SKIP_INSTALL_RPATH=TRUE
+        -DLLVM_MAIN_INCLUDE_DIR=$(Build.SourcesDirectory)/llvm-project/llvm/include
+        -DLIBOMPTARGET_LLVM_INCLUDE_DIRS=$(Build.SourcesDirectory)/llvm-project/llvm/include
+        -DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
+        -DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/llvm/lib"
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
+  - job: aomp_test_${{ job.target }}
+    dependsOn: aomp
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -215,16 +195,12 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmTestDependencies }}
-        os: ${{ job.os }}
+        dependencyList: ${{ parameters.rocmDependencies }}
    - task: Bash@3
      displayName: ROCm symbolic link
      inputs:
@@ -236,7 +212,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: aomp-extras_repo
-    # these copy steps are from the aomp prototype script for test prep
+  # these copy steps are from the aomp prototype script for test prep
    - task: CopyFiles@2
      displayName: 'Copy AOMP contents'
      inputs:
--- a/.azuredevops/components/copyHIP.yml
+++ b/.azuredevops/components/copyHIP.yml
@@ -1,42 +1,34 @@
 parameters:
- name: jobMatrix
-  type: object
-  default:
-    copyJobs:
-      - { os: ubuntu2204, backend: amd }
-      - { os: almalinux8, backend: amd }
-      - { os: ubuntu2204, backend: nvidia }
-      - { os: almalinux8, backend: nvidia }
+- name: checkoutRepo
+  type: string
+  default: 'self'
+- name: checkoutRef
+  type: string
+  default: ''

 # hip and clr are tightly-coupled
 # run this same template for both repos
 # any changes for clr should just trigger HIP pipeline
 jobs:
- ${{ each job in parameters.jobMatrix.copyJobs }}:
-  - job: hip_clr_combined_${{ job.os }}_${{ job.backend }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool:
-      vmImage: ${{ variables.BASE_BUILD_POOL }}
-    workspace:
-      clean: all
-    steps:
-  # checkout nothing, just copy artifacts from triggering HIP job
-  # and then publish for this clr job or for this hipother job to maintain latest
-    - checkout: none
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
-      parameters:
-        componentName: HIP
-        pipelineId: $(HIP_PIPELINE_ID)
-        fileFilter: ${{ job.os }}*${{ job.backend }}
-    - task: Bash@3
-      displayName: Copy HIP artifacts
-      inputs:
-        targetType: inline
-        script: cp -a $(Agent.BuildDirectory)/rocm/* $(Build.BinariesDirectory)/
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+- job: hip_clr_combined
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+# checkout nothing, just copy artifacts from triggering HIP job
+# and then publish for this clr job or for this hipother job to maintain latest
+  - checkout: none
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
+    parameters:
+      componentName: HIP
+      pipelineId: $(HIP_PIPELINE_ID)
+  - template:  ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
+    parameters:
+      sourceDir: $(Agent.BuildDirectory)/rocm
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
--- a/.azuredevops/components/hipBLAS-common.yml
+++ b/.azuredevops/components/hipBLAS-common.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipBLAS-common
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,103 +14,54 @@ parameters:
  type: object
  default:
    - cmake
-    - git
    - ninja-build
+    - git
    - wget
+    - python3-pip
 - name: rocmDependencies
  type: object
  default:
-    - clr
-    - llvm-project
    - rocm-cmake
-    - rocminfo
+    - llvm-project
    - ROCR-Runtime
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-# - name: downstreamComponentMatrix
-#   type: object
-#   default:
-#     - hipBLASLt:
-#       name: hipBLASLt
-#       sparseCheckoutDir: projects/hipblaslt
-#       skipUnifiedBuild: 'false'
-#       buildDependsOn:
-#         - hipBLAS_common_build
+    - clr
+    - rocminfo

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: hipBLAS_common_build_${{ job.os }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: ROCM_PATH
-      value: $(Agent.BuildDirectory)/rocm
-    pool:
-      vmImage: ${{ variables.BASE_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-        componentName: ${{ parameters.componentName }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     extraEnvVars:
-    #       - ROCM_PATH:::/home/user/workspace/rocm
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ parameters.checkoutRepo }}
-#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           buildDependsOn: ${{ component.buildDependsOn }}
-#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
+- job: hipBLAS_common
+  variables:
+  - group: common
+  - name: ROCM_PATH
+    value: $(Agent.BuildDirectory)/rocm
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     extraEnvVars:
+  #       - ROCM_PATH:::/home/user/workspace/rocm
--- a/.azuredevops/components/hipBLASLt.yml
+++ b/.azuredevops/components/hipBLASLt.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipBLASLt
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -32,8 +13,6 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - ccache
-    - gfortran
    - git
    - libdrm-dev
    - libmsgpack-dev
@@ -41,6 +20,9 @@ parameters:
    - ninja-build
    - python3-pip
    - python3-venv
+    - gfortran
+    - libblas-dev
+    - ccache
 - name: pipModules
  type: object
  default:
@@ -55,7 +37,6 @@ parameters:
    - hipBLAS-common
    - llvm-project
    - rocminfo
-    - rocm-cmake
    - rocm_smi_lib
    - rocprofiler-register
    - ROCR-Runtime
@@ -77,37 +58,20 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-# - name: downstreamComponentMatrix
-#   type: object
-#   default:
-#     - rocBLAS:
-#       name: rocBLAS
-#       sparseCheckoutDir: projects/rocblas
-#       skipUnifiedBuild: 'false'
-#       buildDependsOn:
-#         - hipBLASLt_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: hipBLASLt_build_${{ job.target }}
    timeoutInMinutes: 300
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -122,10 +86,6 @@ jobs:
    - name: DAY_STRING
      value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
    pool: ${{ variables.ULTRA_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
@@ -133,22 +93,17 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - task: Bash@3
      displayName: Add ROCm binaries to PATH
      inputs:
@@ -156,20 +111,22 @@ jobs:
        script: |
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
          echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
-    # hipBLASLt has a script for gtest and lapack
-    # https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
-    # $(Agent.BuildDirectory)/deps is a temporary folder for the build process
-    # $(Agent.BuildDirectory)/s/deps is part of the hipBLASLt repo
-    - task: Bash@3
-      displayName: Build and install external dependencies
-      inputs:
-        targetType: inline
-        script: |
-          mkdir -p $(Agent.BuildDirectory)/deps
-          cd $(Agent.BuildDirectory)/deps
-          cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
-          make
-          sudo make install
+  # Build and install gtest, lapack, hipBLAS-common
+  # $(Pipeline.Workspace)/deps is a temporary folder for the build process
+  # $(Pipeline.Workspace)/s/deps is part of the hipBLASLt repo
+    - script: mkdir $(Pipeline.Workspace)/deps
+      displayName: Create temp folder for external dependencies
+  # hipBLASLt already has a CMake script for external deps, so we can just run that
+  # https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
+    - script: cmake $(Pipeline.Workspace)/s/deps
+      displayName: Configure hipBLASLt external dependencies
+      workingDirectory: $(Pipeline.Workspace)/deps
+    - script: make
+      displayName: Build hipBLASLt external dependencies
+      workingDirectory: $(Pipeline.Workspace)/deps
+    - script: sudo make install
+      displayName: Install hipBLASLt external dependencies
+      workingDirectory: $(Pipeline.Workspace)/deps
    - script: |
        mkdir -p $(CCACHE_DIR)
        echo "##vso[task.prependpath]/usr/lib/ccache"
@@ -177,117 +134,93 @@ jobs:
    - task: Cache@2
      displayName: Ccache caching
      inputs:
-        key: hipBLASLt | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+        key: hipBLASLt | $(Agent.OS) | ${{ job.target }} | $(DAY_STRING) | $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
        path: $(CCACHE_DIR)
        restoreKeys: |
-          hipBLASLt | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
-          hipBLASLt | ${{ job.os }} | ${{ job.target }}
-          hipBLASLt | ${{ job.os }}
+          hipBLASLt | $(Agent.OS) | ${{ job.target }} | $(DAY_STRING)
+          hipBLASLt | $(Agent.OS) | ${{ job.target }}
+          hipBLASLt | $(Agent.OS)
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
+          -DCMAKE_BUILD_TYPE=Release
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
          -DCMAKE_C_COMPILER_LAUNCHER=ccache
          -DAMDGPU_TARGETS=${{ job.target }}
+          -DTensile_LOGIC=
+          -DTensile_CPU_THREADS=
+          -DTensile_LIBRARY_FORMAT=msgpack
+          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
          -DBUILD_CLIENTS_TESTS=ON
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          gpuTarget: ${{ job.target }}
-          extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
-          installLatestCMake: true
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
-            - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/amdclang
-            - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
-            - ROCM_PATH:::/home/user/workspace/rocm
-          extraCopyDirectories:
-            - deps
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        gpuTarget: ${{ job.target }}
+        extraPaths: /home/user/workspace/rocm/llvm/bin:/home/user/workspace/rocm/bin
+        installLatestCMake: true
+        extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+          - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/amdclang
+          - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
+          - ROCM_PATH:::/home/user/workspace/rocm
+        extraCopyDirectories:
+          - deps

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      timeoutInMinutes: 300
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      - name: ROCM_PATH
-        value: $(Agent.BuildDirectory)/rocm
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-        parameters:
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './hipblaslt-test'
-          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ parameters.checkoutRepo }}
-#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           buildDependsOn: ${{ component.buildDependsOn }}
-#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipBLASLt_test_${{ job.target }}
+    timeoutInMinutes: 300
+    dependsOn: hipBLASLt_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    - name: ROCM_PATH
+      value: $(Agent.BuildDirectory)/rocm
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipBLASLt
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './hipblaslt-test'
+        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipCUB.yml
+++ b/.azuredevops/components/hipCUB.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipCUB
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,8 +14,9 @@ parameters:
  type: object
  default:
    - cmake
-    - git
    - ninja-build
+    - libgtest-dev
+    - git
    - python3-pip
 - name: rocmDependencies
  type: object
@@ -51,143 +33,103 @@ parameters:
    - llvm-project
    - rocminfo
    - rocPRIM
-    - rocprofiler-register
    - ROCR-Runtime
+    - rocprofiler-register

 - name: jobMatrix
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: hipCUB_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
-          -DBUILD_BENCHMARK=ON
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DBUILD_TEST=ON
          -DAMDGPU_TARGETS=${{ job.target }}
          -GNinja
-        extraCxxFlags: -Wno-deprecated-declarations
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/hipcub'
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipCUB_test_${{ job.target }}
+    dependsOn: hipCUB_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipCUB
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/hipcub'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipFFT.yml
+++ b/.azuredevops/components/hipFFT.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipFFT
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -80,11 +61,7 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.target }} # todo: add OS
+  - job: hipFFT_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -102,15 +79,12 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -128,11 +102,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -141,8 +113,8 @@ jobs:
    #     gpuTarget: ${{ job.target }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+  - job: hipFFT_test_${{ job.target }}
+    dependsOn: hipFFT_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -162,7 +134,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
-        preTargetFilter: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
@@ -170,12 +141,10 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        componentName: hipFFT
        testDir: '$(Agent.BuildDirectory)/rocm/bin'
        testExecutable: './hipfft-test'
        testParameters: '--test_prob 0.002 --gtest_output=xml:./test_output.xml --gtest_color=yes'
--- a/.azuredevops/components/hipRAND.yml
+++ b/.azuredevops/components/hipRAND.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: hipRAND
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,18 +14,18 @@ parameters:
  type: object
  default:
    - cmake
-    - git
    - ninja-build
+    - googletest
+    - git
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
-    - clr
    - llvm-project
-    - rocm-cmake
+    - ROCR-Runtime
+    - clr
    - rocminfo
    - rocRAND
-    - ROCR-Runtime
 - name: rocmTestDependencies
  type: object
  default:
@@ -52,168 +33,110 @@ parameters:
    - llvm-project
    - rocminfo
    - rocprofiler-register
-    - rocRAND
    - ROCR-Runtime
+    - rocRAND

 - name: jobMatrix
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-# - name: downstreamComponentMatrix
-#   type: object
-#   default:
-#     - rocFFT:
-#       name: rocFFT
-#       sparseCheckoutDir: projects/rocfft
-#       skipUnifiedBuild: 'false'
-#       buildDependsOn:
-#         - hipRAND_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: hipRAND_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: HIP_ROCCLR_HOME
      value: $(Build.BinariesDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
          -DBUILD_TEST=ON
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DCMAKE_BUILD_TYPE=Release
          -DAMDGPU_TARGETS=${{ job.target }}
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+    #   parameters:
+    #     aptPackages: ${{ parameters.aptPackages }}
+    #     gpuTarget: ${{ job.target }}
+    #     extraEnvVars:
+    #       - HIP_ROCCLR_HOME:::/home/user/workspace/rocm

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-          and(succeeded(),
-            eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-            not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-            eq(${{ parameters.aggregatePipeline }}, False)
-          )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/hipRAND'
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ parameters.checkoutRepo }}
-#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           buildDependsOn: ${{ component.buildDependsOn }}
-#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: hipRAND_test_${{ job.target }}
+    dependsOn: hipRAND_build_${{ job.target }}
+    condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: hipRAND
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/hipRAND'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipSOLVER.yml
+++ b/.azuredevops/components/hipSOLVER.yml
@@ -92,8 +92,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: external
-        cmakeBuildDir: '$(Build.SourcesDirectory)/deps/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/deps'
+        cmakeBuildDir: 'deps/build'
        installDir: '$(Pipeline.Workspace)/deps-install'
        extraBuildFlags: >-
          -DBUILD_BOOST=OFF
--- a/.azuredevops/components/llvm-project.yml
+++ b/.azuredevops/components/llvm-project.yml
@@ -14,188 +14,142 @@ parameters:
  type: object
  default:
    - cmake
+    - python3-pip
    - libnuma-dev
    - ninja-build
-    - pkg-config
    - python-is-python3
-    - python3-pip
    - zlib1g-dev
+    - pkg-config
 - name: rocmDependencies
  type: object
  default:
    - rocm-cmake

- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-
 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: llvm_project_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        name: 'rocm-ci_high_build_pool_2404' #temporarily using 'high' pool while 'ultra' is down
-      ${{ else }}:
-        name: 'rocm-ci_ultra_build_pool'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: HIP_DEVICE_LIB_PATH
-      value: '$(Build.BinariesDirectory)/amdgcn/bitcode'
-    - name: HIP_PATH
-      value: '$(Agent.BuildDirectory)/rocm'
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        skipLlvmSymlink: true
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: rocm-llvm
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Build.BinariesDirectory)/llvm;$(Build.BinariesDirectory)"
-          -DCMAKE_BUILD_TYPE=Release
-          -DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra;mlir;flang
-          -DLLVM_ENABLE_RUNTIMES=compiler-rt;libunwind;libcxx;libcxxabi
-          -DCLANG_ENABLE_AMDCLANG=ON
-          -DLLVM_TARGETS_TO_BUILD=AMDGPU;X86
-          -DLIBCXX_ENABLE_SHARED=OFF
-          -DLIBCXX_ENABLE_STATIC=ON
-          -DLIBCXX_INSTALL_LIBRARY=OFF
-          -DLIBCXX_INSTALL_HEADERS=OFF
-          -DLIBCXXABI_ENABLE_SHARED=OFF
-          -DLIBCXXABI_ENABLE_STATIC=ON
-          -DLIBCXXABI_INSTALL_STATIC_LIBRARY=OFF
-          -DLLVM_BUILD_DOCS=OFF
-          -DLLVM_ENABLE_SPHINX=OFF
-          -DLLVM_ENABLE_ASSERTIONS=OFF
-          -DLLVM_ENABLE_Z3_SOLVER=OFF
-          -DLLVM_ENABLE_ZLIB=ON
-          -DCLANG_DEFAULT_LINKER=lld
-          -DCLANG_DEFAULT_RTLIB=compiler-rt
-          -DCLANG_DEFAULT_UNWINDLIB=libgcc
-          -DSANITIZER_AMDGPU=OFF
-          -DPACKAGE_VENDOR=AMD
-          -DCLANG_LINK_FLANG_LEGACY=ON
-          -DCMAKE_CXX_STANDARD=17
-          -DROCM_LLVM_BACKWARD_COMPAT_LINK=$(Build.BinariesDirectory)/llvm
-          -DROCM_LLVM_BACKWARD_COMPAT_LINK_TARGET=./lib/llvm
-          -GNinja
-        cmakeBuildDir: '$(Build.SourcesDirectory)/llvm/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/llvm'
-        installDir: '$(Build.BinariesDirectory)/llvm'
-    # use llvm-lit to run unit tests for llvm, clang, and lld
-    - task: Bash@3
-      displayName: 'Copy llvm-lit to install directory'
-      inputs:
-        targetType: inline
-        script: |
-          cp $(Build.SourcesDirectory)/llvm/build/bin/llvm-lit $(Build.BinariesDirectory)/llvm/bin/
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: check-llvm
-        testDir: 'llvm/build'
-        testExecutable: './bin/llvm-lit'
-        testParameters: '-q --xunit-xml-output=llvm_test_output.xml --filter-out="live-debug-values-spill-tracking" ./test'
-        testOutputFile: llvm_test_output.xml
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: check-clang
-        testDir: 'llvm/build'
-        testExecutable: './bin/llvm-lit'
-        testParameters: '-q --xunit-xml-output=clang_test_output.xml ./tools/clang/test'
-        testOutputFile: clang_test_output.xml
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: check-lld
-        testDir: 'llvm/build'
-        testExecutable: './bin/llvm-lit'
-        testParameters: '-q --xunit-xml-output=lld_test_output.xml ./tools/lld/test'
-        testOutputFile: lld_test_output.xml
-        os: ${{ job.os }}
-    - task: CopyFiles@2
-      displayName: Copy FileCheck for Publishing
-      inputs:
-        CleanTargetFolder: false
-        SourceFolder: llvm/build/bin
-        Contents: FileCheck
-        TargetFolder: $(Build.BinariesDirectory)/llvm/bin
-        retryCount: 3
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: device-libs
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build"
-          -DCMAKE_BUILD_TYPE=Release
-        cmakeBuildDir: '$(Build.SourcesDirectory)/amd/device-libs/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/amd/device-libs'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: comgr
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build;$(Build.SourcesDirectory)/amd/device-libs/build"
-          -DCOMGR_DISABLE_SPIRV=1
-          -DCMAKE_BUILD_TYPE=Release
-        cmakeBuildDir: '$(Build.SourcesDirectory)/amd/comgr/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/amd/comgr'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: comgr
-        testParameters: '--output-on-failure --force-new-ctest-process --output-junit comgr_test_output.xml'
-        testDir: 'amd/comgr/build'
-        testOutputFile: comgr_test_output.xml
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: hipcc
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_BUILD_TYPE=Release
-          -DHIPCC_BACKWARD_COMPATIBILITY=OFF
-        cmakeBuildDir: '$(Build.SourcesDirectory)/amd/hipcc/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/amd/hipcc'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: combined
-          extraEnvVars:
-            - HIP_DEVICE_LIB_PATH:::/home/user/workspace/bin/amdgcn/bitcode
-            - HIP_PATH:::/home/user/workspace/rocm
+- job: llvm_project
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  - name: HIP_DEVICE_LIB_PATH
+    value: '$(Build.BinariesDirectory)/amdgcn/bitcode'
+  - name: HIP_PATH
+    value: '$(Agent.BuildDirectory)/rocm'
+  pool: ${{ variables.ULTRA_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      skipLlvmSymlink: true
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: rocm-llvm
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH="$(Build.BinariesDirectory)/llvm;$(Build.BinariesDirectory)"
+        -DCMAKE_BUILD_TYPE=Release
+        -DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra;mlir;flang
+        -DLLVM_ENABLE_RUNTIMES=compiler-rt;libunwind;libcxx;libcxxabi
+        -DCLANG_ENABLE_AMDCLANG=ON
+        -DLLVM_TARGETS_TO_BUILD=AMDGPU;X86
+        -DLIBCXX_ENABLE_SHARED=OFF
+        -DLIBCXX_ENABLE_STATIC=ON
+        -DLIBCXX_INSTALL_LIBRARY=OFF
+        -DLIBCXX_INSTALL_HEADERS=OFF
+        -DLIBCXXABI_ENABLE_SHARED=OFF
+        -DLIBCXXABI_ENABLE_STATIC=ON
+        -DLIBCXXABI_INSTALL_STATIC_LIBRARY=OFF
+        -DLLVM_BUILD_DOCS=OFF
+        -DLLVM_ENABLE_SPHINX=OFF
+        -DLLVM_ENABLE_ASSERTIONS=OFF
+        -DLLVM_ENABLE_Z3_SOLVER=OFF
+        -DLLVM_ENABLE_ZLIB=ON
+        -DCLANG_DEFAULT_LINKER=lld
+        -DCLANG_DEFAULT_RTLIB=compiler-rt
+        -DCLANG_DEFAULT_UNWINDLIB=libgcc
+        -DSANITIZER_AMDGPU=OFF
+        -DPACKAGE_VENDOR=AMD
+        -DCLANG_LINK_FLANG_LEGACY=ON
+        -DCMAKE_CXX_STANDARD=17
+        -DROCM_LLVM_BACKWARD_COMPAT_LINK=$(Build.BinariesDirectory)/llvm
+        -DROCM_LLVM_BACKWARD_COMPAT_LINK_TARGET=./lib/llvm
+        -GNinja
+      cmakeBuildDir: 'llvm/build'
+      installDir: '$(Build.BinariesDirectory)/llvm'
+# use llvm-lit to run unit tests for llvm, clang, and lld
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: check-llvm
+      testDir: 'llvm/build'
+      testExecutable: './bin/llvm-lit'
+      testParameters: '-q --xunit-xml-output=llvm_test_output.xml --filter-out="live-debug-values-spill-tracking" ./test'
+      testOutputFile: llvm_test_output.xml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: check-clang
+      testDir: 'llvm/build'
+      testExecutable: './bin/llvm-lit'
+      testParameters: '-q --xunit-xml-output=clang_test_output.xml ./tools/clang/test'
+      testOutputFile: clang_test_output.xml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: check-lld
+      testDir: 'llvm/build'
+      testExecutable: './bin/llvm-lit'
+      testParameters: '-q --xunit-xml-output=lld_test_output.xml ./tools/lld/test'
+      testOutputFile: lld_test_output.xml
+  - task: CopyFiles@2
+    displayName: Copy FileCheck for Publishing
+    inputs:
+      CleanTargetFolder: false
+      SourceFolder: llvm/build/bin
+      Contents: FileCheck
+      TargetFolder: $(Build.BinariesDirectory)/llvm/bin
+      retryCount: 3
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: device-libs
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build"
+        -DCMAKE_BUILD_TYPE=Release
+      cmakeBuildDir: 'amd/device-libs/build'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: comgr
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build;$(Build.SourcesDirectory)/amd/device-libs/build"
+        -DCOMGR_DISABLE_SPIRV=1
+        -DCMAKE_BUILD_TYPE=Release
+      cmakeBuildDir: 'amd/comgr/build'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: comgr
+      testParameters: '--output-on-failure --force-new-ctest-process --output-junit comgr_test_output.xml'
+      testDir: 'amd/comgr/build'
+      testOutputFile: comgr_test_output.xml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: hipcc
+      extraBuildFlags: >-
+        -DCMAKE_BUILD_TYPE=Release
+        -DHIPCC_BACKWARD_COMPATIBILITY=OFF
+      cmakeBuildDir: 'amd/hipcc/build'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      environment: combined
+      extraEnvVars:
+        - HIP_DEVICE_LIB_PATH:::/home/user/workspace/bin/amdgcn/bitcode
+        - HIP_PATH:::/home/user/workspace/rocm
--- a/.azuredevops/components/rccl.yml
+++ b/.azuredevops/components/rccl.yml
@@ -15,6 +15,7 @@ parameters:
  default:
    - cmake
    - git
+    - googletest
    - libboost-program-options-dev
    - libdrm-dev
    - libfftw3-dev
@@ -89,10 +90,6 @@ jobs:
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        submoduleBehaviour: recursive
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -104,11 +101,12 @@ jobs:
        extraBuildFlags: >-
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
          -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
+          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
          -DCMAKE_BUILD_TYPE=Release
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
          -DBUILD_TESTS=ON
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/share/rocm/cmake;$(Agent.BuildDirectory)/rocm/libexec/hipify
-          -DGPU_TARGETS=${{ job.target }}
+          -DAMDGPU_TARGETS=${{ job.target }}
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
--- a/.azuredevops/components/rdc.yml
+++ b/.azuredevops/components/rdc.yml
@@ -105,7 +105,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
-        cmakeSourceDir: $(Build.SourcesDirectory)/grpc
        installDir: $(Build.SourcesDirectory)/bin
        extraBuildFlags: >-
          -DgRPC_INSTALL=ON
--- a/.azuredevops/components/rocAL.yml
+++ b/.azuredevops/components/rocAL.yml
@@ -125,7 +125,6 @@ jobs:
      parameters:
        componentName: PyBind11
        cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/pybind11'
        customInstallPath: false
        installEnabled: false
        extraBuildFlags: >-
@@ -142,7 +141,6 @@ jobs:
      parameters:
        componentName: RapidJSON
        cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/rapidjson'
        customInstallPath: false
        installEnabled: false
        extraBuildFlags: >-
@@ -202,6 +200,7 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm/include/rocal
    pool:
      name: ${{ job.target }}_test_pool
+      demands: firstRenderDeviceAccess
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocBLAS.yml
+++ b/.azuredevops/components/rocBLAS.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocBLAS
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -83,43 +64,19 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-# - name: downstreamComponentMatrix
-#   type: object
-#   default:
-#     # rocSOLVER depends on both rocBLAS and rocPRIM
-#     # for a unified build, rocBLAS will be the one to call rocSOLVER
-#     - rocSOLVER:
-#       name: rocSOLVER
-#       sparseCheckoutDir: projects/rocsolver
-#       skipUnifiedBuild: 'false'
-#       buildDependsOn:
-#         - rocBLAS_build
-#       unifiedBuild:
-#         downstreamAggregateNames: rocBLAS+rocPRIM
-#         buildDependsOn:
-#           - rocBLAS_build
-#           - rocPRIM_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocBLAS_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -132,10 +89,6 @@ jobs:
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
@@ -143,26 +96,19 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
@@ -182,94 +128,63 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          installAOCL: true
-          gpuTarget: ${{ job.target }}
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
-            - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
-            - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
-            - ROCM_PATH:::/home/user/workspace/rocm
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        installAOCL: true
+        gpuTarget: ${{ job.target }}
+        extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+          - TENSILE_ROCM_ASSEMBLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang
+          - TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
+          - ROCM_PATH:::/home/user/workspace/rocm

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-        parameters:
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './rocblas-test'
-          testParameters: '--yaml rocblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ parameters.checkoutRepo }}
-#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
-#           ${{ if parameters.unifiedBuild }}:
-#             buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
-#             downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
-#           ${{ else }}:
-#             buildDependsOn: ${{ component.buildDependsOn }}
-#             downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocBLAS_test_${{ job.target }}
+    dependsOn: rocBLAS_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocBLAS
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './rocblas-test'
+        testParameters: '--yaml rocblas_smoke.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocDecode.yml
+++ b/.azuredevops/components/rocDecode.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: rocDecode
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -16,28 +13,29 @@ parameters:
 - name: aptPackages
  type: object
  default:
+    - python3-pip
    - cmake
+    - ninja-build
+    - pkg-config
    - ffmpeg
    - libavcodec-dev
    - libavformat-dev
    - libavutil-dev
-    - libdrm-dev
    - libstdc++-12-dev
    - libva-amdgpu-dev
    - mesa-amdgpu-va-drivers
-    - ninja-build
-    - pkg-config
+    - libdrm-dev
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
-    - clr
-    - llvm-project
    - rocm-cmake
-    - rocm-core
-    - rocminfo
-    - rocprofiler-register
+    - llvm-project
    - ROCR-Runtime
+    - clr
+    - rocminfo
+    - rocm-core
+    - rocprofiler-register
 - name: rocmTestDependencies
  type: object
  default:
@@ -50,70 +48,53 @@ parameters:
 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    - name: ROCM_PATH
-      value: $(Agent.BuildDirectory)/rocm
-    pool:
-      vmImage: ${{ variables.BASE_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-        registerROCmPackages: true
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_BUILD_TYPE=Release
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     registerROCmPackages: true
+- job: rocDecode_build
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  - name: ROCM_PATH
+    value: $(Agent.BuildDirectory)/rocm
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      registerROCmPackages: true
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_BUILD_TYPE=Release
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     registerROCmPackages: true

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
+  - job: rocDecode_test_${{ job.target }}
+    dependsOn: rocDecode_build
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -127,33 +108,27 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    pool:
      name: ${{ job.target }}_test_pool
+      demands: firstRenderDeviceAccess
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - task: Bash@3
      displayName: Build rocDecode tests
      inputs:
        targetType: inline
        script: |
-          ${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
          mkdir rocDecode-tests
          cd rocDecode-tests
          cmake $(Agent.BuildDirectory)/rocm/share/rocdecode/test
@@ -162,7 +137,6 @@ jobs:
      parameters:
        componentName: rocDecode
        testDir: 'rocDecode-tests'
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocFFT.yml
+++ b/.azuredevops/components/rocFFT.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocFFT
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -78,23 +59,10 @@ parameters:
        target: gfx942
      - gfx90a:
        target: gfx90a
-# - name: downstreamComponentMatrix
-#   type: object
-#   default:
-#     - hipFFT:
-#       name: hipFFT
-#       sparseCheckoutDir: projects/hipfft
-#       skipUnifiedBuild: 'false'
-#       buildDependsOn:
-#         - rocFFT_build

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_ubuntu2204_${{ job.target }} # todo: un-hardcode OS
+  - job: rocFFT_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -111,15 +79,12 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -136,11 +101,9 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -151,8 +114,8 @@ jobs:
          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+  - job: rocFFT_test_${{ job.target }}
+    dependsOn: rocFFT_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -172,7 +135,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
-        preTargetFilter: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
@@ -180,12 +142,10 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
+        componentName: rocFFT
        testDir: '$(Agent.BuildDirectory)/rocm/bin'
        testExecutable: './rocfft-test'
        testParameters: '--test_prob 0.004 --gtest_output=xml:./test_output.xml --gtest_color=yes'
@@ -194,15 +154,3 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        environment: test
        gpuTarget: ${{ job.target }}
-
-# - ${{ if parameters.triggerDownstreamJobs }}:
-#   - ${{ each component in parameters.downstreamComponentMatrix }}:
-#     - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-#       - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-#         parameters:
-#           checkoutRepo: ${{ parameters.checkoutRepo }}
-#           sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-#           buildDependsOn: ${{ component.buildDependsOn }}
-#           downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-#           triggerDownstreamJobs: true
-#           unifiedBuild: ${{ parameters.unifiedBuild }}
--- a/.azuredevops/components/rocJPEG.yml
+++ b/.azuredevops/components/rocJPEG.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: rocJPEG
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -47,44 +44,32 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: rocJPEG_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        name: rocm-ci_medium_build_pool_2404
-      ${{ else }}:
-        name: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
@@ -95,26 +80,17 @@ jobs:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
@@ -123,8 +99,8 @@ jobs:
    #     registerROCmPackages: true

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: rocJPEG_test_${{ job.target }}
+    dependsOn: rocJPEG_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -138,34 +114,29 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    pool:
      name: ${{ job.target }}_test_pool
+      demands: firstRenderDeviceAccess
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - task: Bash@3
      displayName: Build rocJPEG tests
      inputs:
        targetType: inline
        script: |
-          ${{ iif(eq(job.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
          mkdir rocJPEG-tests
          cd rocJPEG-tests
          cmake $(Agent.BuildDirectory)/rocm/share/rocjpeg/test
@@ -174,7 +145,6 @@ jobs:
      parameters:
        componentName: rocJPEG
        testDir: 'rocJPEG-tests'
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocPRIM
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,17 +14,18 @@ parameters:
  type: object
  default:
    - cmake
-    - git
    - ninja-build
+    - libgtest-dev
+    - git
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
-    - clr
-    - llvm-project
    - rocm-cmake
-    - rocminfo
+    - llvm-project
    - ROCR-Runtime
+    - clr
+    - rocminfo
 - name: rocmTestDependencies
  type: object
  default:
@@ -57,175 +39,96 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 1, shardCount: 3 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 2, shardCount: 3 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 3, shardCount: 3 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 1, shardCount: 3 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 2, shardCount: 3 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a, shard: 3, shardCount: 3 }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - rocThrust:
-      name: rocThrust
-      sparseCheckoutDir: projects/rocthrust
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocPRIM_build
-    - hipCUB:
-      name: hipCUB
-      sparseCheckoutDir: projects/hipcub
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocPRIM_build
-    # rocSOLVER depends on both rocBLAS and rocPRIM
-    # for a unified build, rocBLAS will be the one to call rocSOLVER
-    # - rocSOLVER:
-    #   name: rocSOLVER
-    #   sparseCheckoutDir: projects/rocsolver
-    #   skipUnifiedBuild: 'true'
-    #   buildDependsOn:
-    #     - rocPRIM_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocPRIM_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+          -DBUILD_BENCHMARK=ON
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DAMDGPU_TARGETS=${{ job.target }}
-          -DBUILD_BENCHMARK=ON
          -DBUILD_TEST=ON
          -GNinja
-        extraCxxFlags: -Wno-deprecated-declarations
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}_shard_${{ job.shard }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
-          extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocPRIM_test_${{ job.target }}
+    dependsOn: rocPRIM_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocPRIM
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocPyDecode.yml
+++ b/.azuredevops/components/rocPyDecode.yml
@@ -168,6 +168,7 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    pool:
      name: ${{ job.target }}_test_pool
+      demands: firstRenderDeviceAccess
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocRAND.yml
+++ b/.azuredevops/components/rocRAND.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocRAND
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -34,16 +15,18 @@ parameters:
  default:
    - cmake
    - git
+    - googletest
+    - libgtest-dev
    - ninja-build
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
-    - clr
-    - llvm-project
    - rocm-cmake
-    - rocminfo
+    - llvm-project
    - ROCR-Runtime
+    - clr
+    - rocminfo
 - name: rocmTestDependencies
  type: object
  default:
@@ -57,96 +40,56 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
- name: downstreamComponentMatrix
-  type: object
-  default:
-    - hipRAND:
-      name: hipRAND
-      sparseCheckoutDir: projects/hiprand
-      skipUnifiedBuild: 'false'
-      buildDependsOn:
-        - rocRAND_build
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocRAND_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: HIP_ROCCLR_HOME
      value: $(Build.BinariesDirectory)/rocm
    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        name: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+          -DBUILD_TEST=ON
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DAMDGPU_TARGETS=${{ job.target }}
-          -DBUILD_TEST=ON
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
@@ -155,63 +98,42 @@ jobs:
    #     extraEnvVars:
    #       - HIP_ROCCLR_HOME:::/home/user/workspace/rocm

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocRAND'
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
-
- ${{ if parameters.triggerDownstreamJobs }}:
-  - ${{ each component in parameters.downstreamComponentMatrix }}:
-    - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
-      - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
-        parameters:
-          checkoutRepo: ${{ parameters.checkoutRepo }}
-          sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
-          buildDependsOn: ${{ component.buildDependsOn }}
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
-          triggerDownstreamJobs: true
-          unifiedBuild: ${{ parameters.unifiedBuild }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocRAND_test_${{ job.target }}
+    dependsOn: rocRAND_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocRAND
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/rocRAND'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocSOLVER.yml
+++ b/.azuredevops/components/rocSOLVER.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocSOLVER
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -45,12 +26,14 @@ parameters:
  type: object
  default:
    - clr
+    - hipSPARSE
    - llvm-project
    - rocBLAS
    - rocm-cmake
    - rocminfo
    - rocPRIM
    - ROCR-Runtime
+    - rocSPARSE
 - name: rocmTestDependencies
  type: object
  default:
@@ -72,47 +55,33 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocSOLVER_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - task: Bash@3
      displayName: 'Clone lapack'
      inputs:
@@ -123,15 +92,11 @@ jobs:
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: lapack
-        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=Release
          -DCMAKE_Fortran_FLAGS=-fno-optimize-sibling-calls
@@ -140,11 +105,9 @@ jobs:
          -DLAPACKE=OFF
          -GNinja
        cmakeBuildDir: '$(Build.SourcesDirectory)/lapack/build'
-        cmakeSourceDir: '$(Build.SourcesDirectory)/lapack'
        installDir: '$(Pipeline.Workspace)/deps-install'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
        extraBuildFlags: >-
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
@@ -156,71 +119,56 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
-          extraCopyDirectories:
-            - deps-install
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}
+        extraCopyDirectories:
+          - deps-install

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-        parameters:
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          os: ${{ job.os }}
-          gpuTarget: ${{ job.target }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          os: ${{ job.os }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testExecutable: './rocsolver-test'
-          testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocSOLVER_test_${{ job.target }}
+    dependsOn: rocSOLVER_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocSOLVER
+        testDir: '$(Agent.BuildDirectory)/rocm/bin'
+        testExecutable: './rocsolver-test'
+        testParameters: '--gtest_filter="*checkin*" --gtest_output=xml:./test_output.xml --gtest_color=yes'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocThrust.yml
+++ b/.azuredevops/components/rocThrust.yml
@@ -1,29 +1,10 @@
 parameters:
- name: componentName
-  type: string
-  default: rocThrust
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
-# monorepo related parameters
- name: sparseCheckoutDir
-  type: string
-  default: ''
- name: triggerDownstreamJobs
-  type: boolean
-  default: false
- name: downstreamAggregateNames
-  type: string
-  default: ''
- name: buildDependsOn
-  type: object
-  default: null
- name: unifiedBuild
-  type: boolean
-  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -33,17 +14,18 @@ parameters:
  type: object
  default:
    - cmake
-    - git
    - ninja-build
    - libboost-program-options-dev
+    - googletest
    - libfftw3-dev
+    - git
    - python3-pip
 - name: rocmDependencies
  type: object
  default:
    - clr
+    - hipRAND
    - llvm-project
-    - rocm-cmake
    - rocminfo
    - rocPRIM
    - ROCR-Runtime
@@ -54,142 +36,104 @@ parameters:
    - llvm-project
    - rocminfo
    - rocPRIM
-    - rocprofiler-register
    - ROCR-Runtime
+    - hipRAND
+    - rocprofiler-register

 - name: jobMatrix
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-    ${{ if parameters.buildDependsOn }}:
-      dependsOn:
-        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+  - job: rocThrust_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        ${{ if parameters.triggerDownstreamJobs }}:
-          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        consolidateBuildAndInstall: true
        extraBuildFlags: >-
          -GNinja
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DAMDGPU_TARGETS=${{ job.target }}
          -DBUILD_TEST=ON
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
-        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        gpuTarget: ${{ job.target }}

- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
-      condition:
-        and(succeeded(),
-          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-          eq(${{ parameters.aggregatePipeline }}, False)
-        )
-      variables:
-      - group: common
-      - template: /.azuredevops/variables-global.yml
-      pool: ${{ job.target }}_test_pool
-      workspace:
-        clean: all
-      steps:
-      - checkout: none
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          packageManager: ${{ job.packageManager }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        parameters:
-          preTargetFilter: ${{ parameters.componentName }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        parameters:
-          checkoutRef: ${{ parameters.checkoutRef }}
-          dependencyList: ${{ parameters.rocmTestDependencies }}
-          gpuTarget: ${{ job.target }}
-          os: ${{ job.os }}
-          ${{ if parameters.triggerDownstreamJobs }}:
-            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        parameters:
-          componentName: ${{ parameters.componentName }}
-          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocthrust'
-          testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "scan.hip"'
-          os: ${{ job.os }}
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          environment: test
-          gpuTarget: ${{ job.target }}
+- ${{ each job in parameters.jobMatrix.testJobs }}:
+  - job: rocThrust_test_${{ job.target }}
+    dependsOn: rocThrust_build_${{ job.target }}
+    condition:
+      and(succeeded(),
+        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+        eq(${{ parameters.aggregatePipeline }}, False)
+      )
+    variables:
+    - group: common
+    - template: /.azuredevops/variables-global.yml
+    pool: ${{ job.target }}_test_pool
+    workspace:
+      clean: all
+    steps:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      parameters:
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmTestDependencies }}
+        gpuTarget: ${{ job.target }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+      parameters:
+        componentName: rocThrust
+        testDir: '$(Agent.BuildDirectory)/rocm/bin/rocthrust'
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        environment: test
+        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocm-cmake.yml
+++ b/.azuredevops/components/rocm-cmake.yml
@@ -16,6 +16,8 @@ parameters:
    - doxygen
    - doxygen-doc
    - ninja-build
+    - python3-pip
+    - python3-sphinx
 - name: pipModules
  type: object
  default:
@@ -23,75 +25,49 @@ parameters:
    - cmake==3.20.5
    - ninja
    - rocm-docs-core
-    - sphinx
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocm_cmake_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
-    - task: Bash@3
-      displayName: Add CMake to PATH
-      inputs:
-        targetType: inline
-        script: echo "##vso[task.prependpath]$(python3 -m site --user-base)/bin"
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-    - task: Bash@3
-      displayName: CTest setup
-      inputs:
-        targetType: inline
-        script: |
-          python -m pip install -r $(Build.SourcesDirectory)/docs/requirements.txt
-          python -m pip install -r $(Build.SourcesDirectory)/test/docsphinx/docs/.sphinx/requirements.txt
-          git config --global user.email "you@example.com"
-          git config --global user.name "Your Name"
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: rocm-cmake
-        testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     pipModules: ${{ parameters.pipModules }}
-    #     environment: combined
+- job: rocm_cmake
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      pipModules: ${{ parameters.pipModules }}
+  - task: Bash@3
+    displayName: Add CMake to PATH
+    inputs:
+      targetType: inline
+      script: echo "##vso[task.prependpath]$(python3 -m site --user-base)/bin"
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+  - task: Bash@3
+    displayName: CTest setup
+    inputs:
+      targetType: inline
+      script: |
+        python -m pip install -r $(Build.SourcesDirectory)/docs/requirements.txt
+        python -m pip install -r $(Build.SourcesDirectory)/test/docsphinx/docs/.sphinx/requirements.txt
+        git config --global user.email "you@example.com"
+        git config --global user.name "Your Name"
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: rocm-cmake
+      testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     pipModules: ${{ parameters.pipModules }}
+  #     environment: combined
--- a/.azuredevops/components/rocm-core.yml
+++ b/.azuredevops/components/rocm-core.yml
@@ -15,61 +15,39 @@ parameters:
  default:
    - cmake
    - ninja-build
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
+    - python3-pip

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocm_core_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_CURRENT_BINARY_DIR=$PWD
-          -DCMAKE_CURRENT_SOURCE_DIR=$PWD/../
-          -DCMAKE_VERBOSE_MAKEFILE=1
-          -DCPACK_GENERATOR=DEB
-          -DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
-          -DCPACK_RPM_PACKAGE_RELEASE="local.9999"
-          -DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
+- job: rocm_core
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_CURRENT_BINARY_DIR=$PWD
+        -DCMAKE_CURRENT_SOURCE_DIR=$PWD/../
+        -DCMAKE_VERBOSE_MAKEFILE=1
+        -DCPACK_GENERATOR=DEB
+        -DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
+        -DCPACK_RPM_PACKAGE_RELEASE="local.9999"
+        -DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -15,7 +15,6 @@ parameters:
  default:
    - cmake
    - libglfw3-dev
-    - libmsgpack-dev
    - libtbb-dev
    - ninja-build
    - python3-pip
@@ -184,7 +183,6 @@ jobs:
      parameters:
        componentName: rocm-examples
        testDir: $(Build.SourcesDirectory)/build
-        testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "rocfft_callback"'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocm_smi_lib.yml
+++ b/.azuredevops/components/rocm_smi_lib.yml
@@ -17,66 +17,50 @@ parameters:
    - libdrm-dev
    - ninja-build
    - pkg-config
+    - python3-pip

 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocm_smi_lib_build_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DBUILD_TESTS=ON
-          -DROCM_DEP_ROCMCORE=ON
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
+- job: rocm_smi_lib_build
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DBUILD_TESTS=ON
+        -DROCM_DEP_ROCMCORE=ON
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
-    dependsOn: rocm_smi_lib_build_${{ job.os }}
+  - job: rocm_smi_lib_test_${{ job.target }}
+    dependsOn: rocm_smi_lib_build
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -93,11 +77,8 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      parameters:
        runRocminfo: false
@@ -105,9 +86,8 @@ jobs:
      parameters:
        componentName: rocm_smi_lib
        testDir: '$(Agent.BuildDirectory)'
-        testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
+        testExecutable: './rocm/share/rocm_smi/rsmitst_tests/rsmitst'
        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocminfo.yml
+++ b/.azuredevops/components/rocminfo.yml
@@ -17,6 +17,7 @@ parameters:
    - libdrm-amdgpu-dev
    - libdrm-dev
    - ninja-build
+    - python3-pip
 - name: rocmDependencies
  type: object
  default:
@@ -31,63 +32,49 @@ parameters:
 - name: jobMatrix
  type: object
  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocminfo_build_${{ job.os }}
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-        registerROCmPackages: true
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmDependencies }}
-        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        skipLlvmSymlink: true
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-          -DROCRTST_BLD_TYPE=release
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+- job: rocminfo
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      registerROCmPackages: true
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+    parameters:
+      checkoutRef: ${{ parameters.checkoutRef }}
+      dependencyList: ${{ parameters.rocmDependencies }}
+      aggregatePipeline: ${{ parameters.aggregatePipeline }}
+      skipLlvmSymlink: true
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DROCRTST_BLD_TYPE=release
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml

 - ${{ each job in parameters.jobMatrix.testJobs }}:
  - job: rocminfo_test_${{ job.target }}
-    dependsOn: rocminfo_build_${{ job.os }}
+    dependsOn: rocminfo
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -104,18 +91,14 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      parameters:
        runRocminfo: false
@@ -126,7 +109,6 @@ jobs:
        testExecutable: './rocm/bin/rocminfo'
        testParameters: ''
        testPublishResults: false
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
        componentName: rocm_agent_enumerator
@@ -134,7 +116,6 @@ jobs:
        testExecutable: './rocm/bin/rocm_agent_enumerator'
        testParameters: ''
        testPublishResults: false
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -24,28 +24,24 @@ parameters:
  default:
    - astunparse==1.6.2
    - colorlover
-    - dash-bootstrap-components
-    - dash-svg
-    - "dash>=3.0.0"
-    - kaleido==0.2.1
+    - "dash>=1.12.0"
    - matplotlib
    - "numpy>=1.17.5"
    - "pandas>=1.4.3"
-    - plotext
-    - plotille
    - pymongo
    - pyyaml
-    - setuptools
    - tabulate
-    - textual
-    - textual_plotext
-    - textual-fspicker
    - tqdm
+    - dash-svg
+    - dash-bootstrap-components
+    - kaleido
+    - setuptools
+    - plotille
    - mock
    - pytest
    - pytest-cov
    - pytest-xdist
- name: rocmTestDependencies
+- name: rocmDependencies
  type: object
  default:
    - amdsmi
@@ -118,6 +114,14 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: ${{ job.dependencySource }}
+        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -161,6 +165,14 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+    - task: Bash@3
+      displayName: Add en_US.UTF-8 locale
+      inputs:
+        targetType: inline
+        script: |
+          sudo locale-gen en_US.UTF-8
+          sudo update-locale
+          locale -a
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
@@ -172,17 +184,9 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmTestDependencies }}
+        dependencyList: ${{ parameters.rocmDependencies }}
        dependencySource: ${{ job.dependencySource }}
        gpuTarget: ${{ job.target }}
-    - task: Bash@3
-      displayName: Add en_US.UTF-8 locale
-      inputs:
-        targetType: inline
-        script: |
-          sudo locale-gen en_US.UTF-8
-          sudo update-locale
-          locale -a
    - task: Bash@3
      displayName: Add ROCm binaries to PATH
      inputs:
--- a/.azuredevops/components/rocprofiler-register.yml
+++ b/.azuredevops/components/rocprofiler-register.yml
@@ -15,62 +15,40 @@ parameters:
  default:
    - cmake
    - ninja-build
-
- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: ubuntu2404, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
+    - python3-pip

 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocprofiler_register_${{ job.os }}
-    pool:
-      ${{ if eq(job.os, 'ubuntu2404') }}:
-        vmImage: 'ubuntu-24.04'
-      ${{ else }}:
-        vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
-      parameters:
-        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        componentName: rocprofiler-register
-        os: ${{ job.os }}
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
-          -DROCPROFILER_REGISTER_BUILD_TESTS=ON
-          -DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: rocprofiler-register
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-    #   parameters:
-    #     aptPackages: ${{ parameters.aptPackages }}
-    #     environment: combined
+- job: rocprofiler_register
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: rocprofiler-register
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
+        -DROCPROFILER_REGISTER_BUILD_TESTS=ON
+        -DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: rocprofiler-register
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
+  # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+  #   parameters:
+  #     aptPackages: ${{ parameters.aptPackages }}
+  #     environment: combined
--- a/.azuredevops/components/rocprofiler-sdk.yml
+++ b/.azuredevops/components/rocprofiler-sdk.yml
@@ -14,12 +14,10 @@ parameters:
  type: object
  default:
    - build-essential
-    - cmake
    - libdrm-amdgpu-dev
    - libdrm-dev
    - libdw-dev
    - libelf-dev
-    - libsqlite3-dev
    - libva-dev
    - ninja-build
    - pkg-config
@@ -76,7 +74,8 @@ jobs:
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocprofiler-systems.yml
+++ b/.azuredevops/components/rocprofiler-systems.yml
@@ -67,7 +67,6 @@ parameters:
    - rocprofiler-register
    - rocprofiler-sdk
    - ROCR-Runtime
-    - roctracer

 - name: jobMatrix
  type: object
@@ -167,6 +166,7 @@ jobs:
      value: $(Agent.BuildDirectory)/rocm
    pool:
      name: ${{ job.target }}_test_pool
+      demands: firstRenderDeviceAccess
    workspace:
      clean: all
    steps:
--- a/.azuredevops/components/rocprofiler.yml
+++ b/.azuredevops/components/rocprofiler.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: rocprofiler
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -18,6 +15,7 @@ parameters:
  type: object
  default:
    - cmake
+    - libgtest-dev
    - libdrm-dev
    - libdw-dev
    - libsystemd-dev
@@ -28,13 +26,13 @@ parameters:
 - name: pipModules
  type: object
  default:
-    - barectf
-    - Cppheaderparser
-    - lxml
-    - matplotlib
-    - pandas
    - pyyaml==5.3.1
+    - Cppheaderparser
    - websockets
+    - matplotlib
+    - lxml
+    - barectf
+    - pandas
 - name: rocmDependencies
  type: object
  default:
@@ -43,33 +41,29 @@ parameters:
    - ROCdbgapi
    - rocm-cmake
    - rocm-core
-    - rocminfo
    - rocm_smi_lib
-    - rocprofiler-register
+    - rocminfo
    - ROCR-Runtime
+    - rocprofiler-register
    - roctracer

 - name: jobMatrix
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: rocprofiler_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -78,10 +72,6 @@ jobs:
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool: ${{ variables.MEDIUM_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
@@ -89,59 +79,46 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
-      parameters:
-        dependencyList:
-          - gtest
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
        extraBuildFlags: >-
-          -DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
-          -DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
-          -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+          -DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip
+          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DENABLE_LDCONFIG=OFF
          -DUSE_PROF_API=1
          -DGPU_TARGETS=${{ job.target }}
+          -DAMDGPU_TARGETS=${{ job.target }}
        multithreadFlag: -- -j32
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
-    - ${{ if eq(job.os, 'ubuntu2204') }}:
-      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        parameters:
-          aptPackages: ${{ parameters.aptPackages }}
-          pipModules: ${{ parameters.pipModules }}
-          gpuTarget: ${{ job.target }}
-          extraEnvVars:
-            - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
-            - ROCM_PATH:::/home/user/workspace/rocm
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+      parameters:
+        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
+        gpuTarget: ${{ job.target }}
+        extraEnvVars:
+          - HIP_ROCCLR_HOME:::/home/user/workspace/rocm
+          - ROCM_PATH:::/home/user/workspace/rocm

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: rocprofiler_test_${{ job.target }}
+    dependsOn: rocprofiler_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -162,21 +139,16 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
@@ -185,14 +157,12 @@ jobs:
        testExecutable:  ./run.sh
        testParameters: ''
        testPublishResults: false
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
        componentName: rocprofilerV2
        testDir: $(Agent.BuildDirectory)/rocm
        testExecutable:  share/rocprofiler/tests/runUnitTests
        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/roctracer.yml
+++ b/.azuredevops/components/roctracer.yml
@@ -1,7 +1,4 @@
 parameters:
- name: componentName
-  type: string
-  default: roctracer
 - name: checkoutRepo
  type: string
  default: 'self'
@@ -21,7 +18,7 @@ parameters:
    - graphviz
    - libdrm-amdgpu-dev
    - ninja-build
-    - zlib1g-dev
+    - python3-pip
 - name: pipModules
  type: object
  default:
@@ -48,32 +45,26 @@ parameters:
  type: object
  default:
    buildJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
-      - { os: almalinux8, packageManager: dnf, target: gfx942 }
-      - { os: almalinux8, packageManager: dnf, target: gfx90a }
-      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
-      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a
    testJobs:
-      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
-      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
+      - gfx942:
+        target: gfx942
+      - gfx90a:
+        target: gfx90a

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: roctracer_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
+    - name: HIP_ROCCLR_HOME
+      value: $(Build.BinariesDirectory)/rocm
    pool:
      vmImage: ${{ variables.BASE_BUILD_POOL }}
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
    workspace:
      clean: all
    steps:
@@ -81,7 +72,6 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
@@ -93,27 +83,21 @@ jobs:
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
-        os: ${{ job.os }}
-    # the linker flags will not affect ubuntu2204 builds as the paths do not exist
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
-        os: ${{ job.os }}
-        useAmdclang: false
        extraBuildFlags: >-
          -DCMAKE_BUILD_TYPE=release
          -DROCM_PATH=$(Agent.BuildDirectory)/rocm
-          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
+          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
          -DGPU_TARGETS=${{ job.target }}
-          -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+          -DAMDGPU_TARGETS=${{ job.target }}
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -124,8 +108,8 @@ jobs:
    #     registerROCmPackages: true

 - ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
+  - job: roctracer_test_${{ job.target }}
+    dependsOn: roctracer_build_${{ job.target }}
    condition:
      and(succeeded(),
        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
@@ -143,20 +127,17 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
-        packageManager: ${{ job.packageManager }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
      parameters:
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmTestDependencies }}
        gpuTarget: ${{ job.target }}
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
      parameters:
@@ -165,7 +146,6 @@ jobs:
        testParameters: ''
        testDir: $(Agent.BuildDirectory)
        testPublishResults: false
-        os: ${{ job.os }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/dependencies/grpc.yml
+++ b/.azuredevops/dependencies/grpc.yml
@@ -38,7 +38,6 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      cmakeBuildDir: $(Agent.BuildDirectory)/grpc/build
-      cmakeSourceDir: $(Agent.BuildDirectory)/grpc
      extraBuildFlags: >-
        -DgRPC_INSTALL=ON
        -DgRPC_BUILD_TESTS=OFF
--- a/.azuredevops/dependencies/gtest.yml
+++ b/.azuredevops/dependencies/gtest.yml
@@ -11,54 +11,35 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - cmake
    - git
+    - cmake
    - ninja-build

- name: jobMatrix
-  type: object
-  default:
-    buildJobs:
-      - { os: ubuntu2204, packageManager: apt }
-      - { os: almalinux8, packageManager: dnf }
-
 jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: gtest_${{ job.os }}
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool:
-      vmImage: 'ubuntu-22.04'
-    ${{ if eq(job.os, 'almalinux8') }}:
-      container:
-        image: rocmexternalcicd.azurecr.io/manylinux228:latest
-        endpoint: ContainerService3
-    workspace:
-      clean: all
-    steps:
-    - checkout: none
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - task: Bash@3
-      displayName: Clone GTest ${{ parameters.gtestVersion }}
-      inputs:
-        targetType: inline
-        script: git clone https://github.com/google/googletest -b ${{ parameters.gtestVersion }} --depth=1 --shallow-submodules --recurse-submodules
-        workingDirectory: $(Agent.BuildDirectory)
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
-      parameters:
-        os: ${{ job.os }}
-        cmakeBuildDir: $(Agent.BuildDirectory)/googletest/build
-        cmakeSourceDir: $(Agent.BuildDirectory)/googletest
-        useAmdclang: false
-        extraBuildFlags: >-
-          -DGTEST_FORCE_SHARED_CRT=ON
-          -DCMAKE_DEBUG_POSTFIX=d
-          -GNinja
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
-      parameters:
-        os: ${{ job.os }}
+- job: gtest
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - task: Bash@3
+    displayName: 'git clone gtest'
+    inputs:
+      targetType: inline
+      script: git clone -b ${{ parameters.gtestVersion }} https://github.com/google/googletest --depth=1 --shallow-submodules --recurse-submodules
+      workingDirectory: $(Agent.BuildDirectory)
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      cmakeBuildDir: $(Agent.BuildDirectory)/googletest/build
+      extraBuildFlags: >-
+        -DGTEST_FORCE_SHARED_CRT=ON
+        -DCMAKE_DEBUG_POSTFIX=d
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/nightly/pytorch.yml
+++ b/.azuredevops/nightly/pytorch.yml
@@ -4,71 +4,71 @@ parameters:
 - name: aptPackages
  type: object
  default:
-    - autoconf
+    - build-essential
+    - git
+    - ninja-build
+    - openjdk-8-jdk
+    - ca-certificates
    - bc
    - bridge-utils
-    - build-essential
-    - ca-certificates
-    - ccache
    - devscripts
    - dkms
    - doxygen
-    - fakeroot
-    - ffmpeg
-    - gfortran
-    - git
-    - gnutls-bin
-    - libamd2
-    - libavformat-dev
-    - libblas3
-    - libcamd2
-    - libccolamd2
-    - libcholmod3
-    - libcolamd2
    - libdpkg-dev
    - libdpkg-perl
-    - libdrm-amdgpu1
-    - libdrm-dev
    - libelf-dev
-    - libfreetype-dev
-    - libgfortran5
-    - libgomp1
-    - libjpeg-dev
-    - libjpeg-turbo-official
-    - liblapack-dev
-    - liblapack3
-    - libmetis5
-    - libncurses-dev
-    - libnuma-dev
-    - libopenblas-dev
-    - libpth-dev
-    - libquadmath0
-    - libssh-dev
-    - libstdc++-12-dev
-    - libsuitesparseconfig5
-    - libswscale-dev
-    - libtinfo-dev
-    - libunwind-dev
-    - libwebp-dev
-    - llvm-dev
-    - ncurses-base
-    - ninja-build
-    - numactl
-    - openjdk-8-jdk
-    - python-is-python3
    - python3-dev
    - python3-pip
    - python3-venv
+    - wget
+    - ncurses-base
+    - libncurses-dev
+    - numactl
+    - libnuma-dev
+    - libssh-dev
+    - libunwind-dev
+    - llvm-dev
+    - libpth-dev
    - qemu-kvm
    - re2c
    - subversion
-    - wget
+    - fakeroot
+    - autoconf
+    - libgomp1
+    - libtinfo-dev
+    - libcholmod3
+    - libsuitesparseconfig5
+    - libstdc++-12-dev
+    - python-is-python3
+    - gfortran
+    - libgfortran5
+    - liblapack3
+    - libblas3
+    - libquadmath0
+    - libmetis5
+    - libamd2
+    - libcamd2
+    - libcolamd2
+    - libccolamd2
+    - libdrm-amdgpu1
+    - ccache
    - zip
+    - libjpeg-turbo-official
+    - libjpeg-dev
+    - libwebp-dev
+    - libfreetype-dev
+    - gnutls-bin
+    - ffmpeg
+    - libopenblas-dev
+    - liblapack-dev
+    - libswscale-dev
+    - libavformat-dev
 - name: pipModules
  type: object
  default:
+    - cmake
    - astunparse
-    - "expecttest>=0.3.0"
+    - "expecttest>=0.2.1"
    - hypothesis
    - numpy
    - psutil
@@ -76,8 +76,8 @@ parameters:
    - requests
    - setuptools==75.8.0
    - types-dataclasses
-    - "typing-extensions>=4.10.0"
-    - "sympy>=1.13.3"
+    - "typing-extensions>=4.8.0"
+    - "sympy>=1.13.0"
    - filelock
    - networkx
    - jinja2
@@ -97,39 +97,36 @@ parameters:
 - name: rocmDependencies
  type: object
  default:
+    - rocminfo
+    - MIOpen
    - clr
    - hipBLAS
-    - hipBLASLt
    - hipFFT
    - hipRAND
    - hipSOLVER
    - hipSPARSE
-    - hipSPARSELt
+    - ROCR-Runtime
    - llvm-project
-    - MIOpen
    - rccl
    - rocBLAS
    - rocFFT
-    - rocm-core
-    - rocminfo
    - rocm_smi_lib
-    - rocPRIM
-    - rocprofiler-register
    - rocRAND
-    - ROCR-Runtime
    - rocSOLVER
    - rocSPARSE
    - roctracer
+    - hipBLASLt
+    - rocprofiler-register
+    - rocm-core
+    - rocPRIM
    # below are additional dependencies not called out by build script, but throw errors during cmake
-    - composable_kernel
-    - hipBLAS-common
    - hipCUB
    - rocThrust
+    - hipBLAS-common
+    - composable_kernel
 - name: rocmTestDependencies
  type: object
  default:
-    # rocroller.so needed and is not included in the wheel
-    - hipBLASLt
    - rocminfo
 # Reference on what tests to run for torchvision found in private repo:
 # https://github.com/ROCm/rocAutomation/blob/jenkins-pipelines/pytorch/pytorch_ci/test_pytorch_test1.sh#L54
@@ -243,6 +240,12 @@ jobs:
        git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules
        sudo ln -s $(Build.SourcesDirectory)/builder /builder
      workingDirectory: $(Build.SourcesDirectory)
+  - task: Bash@3
+    displayName: Temporarily Patch CK Submodule
+    inputs:
+      targetType: inline
+      script: git pull origin develop
+      workingDirectory: $(Build.SourcesDirectory)/pytorch/third_party/composable_kernel
  - task: Bash@3
    displayName: Install patchelf
    inputs:
@@ -264,11 +267,6 @@ jobs:
      script: |
        sudo bash pytorch/.ci/docker/common/install_rocm_magma.sh $(MAGMA_ROCM)
      workingDirectory: $(Build.SourcesDirectory)
-  - task: Bash@3
-    displayName: Install targeted typing_extensions for build
-    inputs:
-      targetType: inline
-      script: pip install --target=$(Build.SourcesDirectory)/pytorch/torch/.. typing_extensions
  - task: Bash@3
    displayName: Run ROCm Build Script
    inputs:
@@ -283,6 +281,7 @@ jobs:
        PYTORCH_ROOT=$(PYTORCH_ROOT)
        CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
        DESIRED_DEVTOOLSET=$(DESIRED_DEVTOOLSET)
+        TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
        PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)
        PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d)
        SKIP_ALL_TESTS=1
@@ -323,6 +322,8 @@ jobs:
      inputs:
        targetType: inline
        script: >-
+          TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
+          TORCHVISION_PACKAGE_NAME=torchvision.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
          PYTORCH_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
          BUILD_VERSION=$(cat $(Build.SourcesDirectory)/vision/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
          python3 setup.py bdist_wheel
@@ -399,9 +400,11 @@ jobs:
  - task: DownloadPipelineArtifact@2
    displayName: 'Download Pipeline Wheel Files'
    inputs:
-      itemPattern: '**/*.whl'
+      itemPattern: '**/*$(JOB_GPU_TARGET)*.whl'
      targetPath: $(Agent.BuildDirectory)
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+    parameters:
+      dependencySource: staging
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
    parameters:
      dependencyList: ${{ parameters.rocmTestDependencies }}
--- a/.azuredevops/nightly/rocm-nightly.yml
+++ b/.azuredevops/nightly/rocm-nightly.yml
@@ -3,21 +3,12 @@ parameters:
 - name: jobList
  type: object
  default:
-    - { os: ubuntu2204, target: gfx942, source: staging }
-    - { os: ubuntu2204, target: gfx90a, source: staging }
-    - { os: ubuntu2204, target: gfx1201, source: staging }
-    - { os: ubuntu2204, target: gfx1100, source: staging }
-    - { os: ubuntu2204, target: gfx1030, source: staging }
-    - { os: ubuntu2404, target: gfx942, source: staging }
-    - { os: ubuntu2404, target: gfx90a, source: staging }
-    - { os: ubuntu2404, target: gfx1201, source: staging }
-    - { os: ubuntu2404, target: gfx1100, source: staging }
-    - { os: ubuntu2404, target: gfx1030, source: staging }
-    - { os: almalinux8, target: gfx942, source: staging }
-    - { os: almalinux8, target: gfx90a, source: staging }
-    - { os: almalinux8, target: gfx1201, source: staging }
-    - { os: almalinux8, target: gfx1100, source: staging }
-    - { os: almalinux8, target: gfx1030, source: staging }
+    - gfx942-staging:
+      target: gfx942
+      source: staging
+    - gfx90a-staging:
+      target: gfx90a
+      source: staging
 - name: rocmDependencies
  type: object
  default:
@@ -25,9 +16,9 @@ parameters:
    - amdsmi
    - aomp-extras
    - aomp
-    - clr
    - composable_kernel
    - half
+    - HIP
    - hip-tests
    - hipBLAS
    - hipBLAS-common
@@ -92,7 +83,7 @@ schedules:

 jobs:
 - ${{ each job in parameters.jobList }}:
-  - job: rocm_nightly_${{ job.os }}_${{ job.target }}_${{ job.source }}
+  - job: rocm_nightly_${{ job.target }}_${{ job.source }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -117,9 +108,9 @@ jobs:
      parameters:
        dependencySource: ${{ job.source }}
        dependencyList: ${{ parameters.rocmDependencies }}
-        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        skipLibraryLinking: true
+        skipLlvmSymlink: true
    - script: df -h
      displayName: System disk space after ROCm
    - script: du -sh $(Agent.BuildDirectory)/rocm
--- a/.azuredevops/tag-builds/clr.yml
+++ b/.azuredevops/tag-builds/clr.yml
@@ -28,22 +28,12 @@ resources:
    endpoint: ROCm
    name: ROCm/hipother
    ref: ${{ parameters.checkoutRef }}
-  pipelines:
-  - pipeline: hip_pipeline
-    source: \experimental\HIP
-    trigger: true
-  - pipeline: hipother_pipeline
-    source: \experimental\hipother
-    trigger: true

 trigger: none
 pr: none

 jobs:
-  - ${{ if eq(variables['Build.Reason'], 'ResourceTrigger') }}:
-    - template: ${{ variables.CI_COMPONENT_PATH }}/copyHIP.yml@pipelines_repo
-  - ${{ if ne(variables['Build.Reason'], 'ResourceTrigger') }}:
-    - template: ${{ variables.CI_COMPONENT_PATH }}/HIP.yml@pipelines_repo
-      parameters:
-        checkoutRepo: release_repo
-        checkoutRef: ${{ parameters.checkoutRef }}
+  - template: ${{ variables.CI_COMPONENT_PATH }}/HIP.yml
+    parameters:
+      checkoutRepo: release_repo
+      checkoutRef: ${{ parameters.checkoutRef }}
--- a/.azuredevops/templates/steps/artifact-download.yml
+++ b/.azuredevops/templates/steps/artifact-download.yml
@@ -19,24 +19,36 @@ parameters:
  default: false

 steps:
+- task: Bash@3
+  displayName: Set allowPartiallySucceededBuilds
+  inputs:
+    targetType: inline
+    script: |
+      if [[ ",$ALLOWED_PARTIAL_SUCCEED_BUILDS," == *",${{ parameters.componentName }},"* ]]; then
+        echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]true"
+      else
+        echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]false"
+      fi
 - task: DownloadPipelineArtifact@2
  displayName: Download ${{ parameters.componentName }}
  inputs:
-    itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
-    targetPath: '$(Pipeline.Workspace)/d'
-    allowPartiallySucceededBuilds: true
-    ${{ if parameters.aggregatePipeline }}:
-      buildType: 'current'
-    ${{ else }}:
+    ${{ if eq(parameters.aggregatePipeline, false) }}:
      buildType: 'specific'
      project: ROCm-CI
-      specificBuildWithTriggering: true
      definition: ${{ parameters.pipelineId }}
+      specificBuildWithTriggering: true
+      itemPattern: '**/*${{ parameters.fileFilter }}*'
+      # aomp is a special case, since the trigger file is under ROCm/ROCm instead of the component repo
+      ${{ if notIn(parameters.componentName, 'aomp') }}:
+        buildVersionToDownload: latestFromBranch # default is 'latest'
      branchName: refs/heads/${{ parameters.branchName }}
-      ${{ if eq(parameters.componentName, 'aomp') }}:
-        buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
-      ${{ else }}:
-        buildVersionToDownload: latestFromBranch
+      allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
+      targetPath: '$(Pipeline.Workspace)/d'
+    ${{ else }}:
+      buildType: 'current'
+      itemPattern: '**/${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
+      allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds)
+      targetPath: '$(Pipeline.Workspace)/d'
 - task: ExtractFiles@1
  displayName: Extract ${{ parameters.componentName }}
  inputs:
--- a/.azuredevops/templates/steps/artifact-upload.yml
+++ b/.azuredevops/templates/steps/artifact-upload.yml
@@ -3,21 +3,15 @@
 # publish can be toggled off for jobs that produce multiple tarballs
 # for those cases, only publish the last call which puts all the tarballs in one container folder
 parameters:
- name: componentName
-  type: string
-  default: $(Build.DefinitionName)
- name: gpuTarget
-  type: string
-  default: ''
 - name: artifactName
  type: string
-  default: drop
+  default: 'drop'
 - name: publish
  type: boolean
  default: true
- name: os
+- name: gpuTarget
  type: string
-  default: 'ubuntu2204'
+  default: ''

 steps:
 - task: ArchiveFiles@2
@@ -26,7 +20,7 @@ steps:
    includeRootFolder: false
    archiveType: 'tar'
    tarCompression: 'gz'
-    archiveFile: '$(Build.ArtifactStagingDirectory)/${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz'
+    archiveFile: '$(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz'
 - task: DeleteFiles@1
  displayName: 'Cleanup Staging Area'
  inputs:
@@ -38,7 +32,7 @@ steps:
  inputs:
    workingDirectory: $(Pipeline.Workspace)
    targetType: inline
-    script: echo "${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz" >> pipelineArtifacts.txt
+    script: echo "$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz" >> pipelineArtifacts.txt
 # then publish it
 - ${{ if parameters.publish }}:
  - task: PublishPipelineArtifact@1
@@ -46,5 +40,4 @@ steps:
    displayName: '${{ parameters.artifactName }} Publish'
    retryCountOnTaskFailure: 3
    inputs:
-      artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
      targetPath: '$(Build.ArtifactStagingDirectory)'
--- a/.azuredevops/templates/steps/build-autotools.yml
+++ b/.azuredevops/templates/steps/build-autotools.yml
@@ -1,7 +1,4 @@
 parameters:
- name: os
-  type: string
-  default: 'ubuntu2204'
 - name: componentName
  type: string
  default: ''
@@ -23,23 +20,17 @@ steps:
  displayName: '${{ parameters.componentName }} configure flags'
  inputs:
    targetType: inline
+    script: ./configure --prefix=${{ parameters.installDir }} ${{ parameters.configureFlags }}
    workingDirectory: ${{ parameters.buildDir }}
-    script: |
-      ${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
-      ./configure --prefix=${{ parameters.installDir }} ${{ parameters.configureFlags }}
 - task: Bash@3
  displayName: '${{ parameters.componentName }} make'
  inputs:
    targetType: inline
+    script: ${{ parameters.makeCallPrefix }} make -j$(nproc)
    workingDirectory: ${{ parameters.buildDir }}
-    script: |
-      ${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
-      ${{ parameters.makeCallPrefix }} make -j$(nproc)
 - task: Bash@3
  displayName: '${{ parameters.componentName }} make install'
  inputs:
    targetType: inline
+    script: make install
    workingDirectory: ${{ parameters.buildDir }}
-    script: |
-      ${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
-      make install
--- a/.azuredevops/templates/steps/build-cmake.yml
+++ b/.azuredevops/templates/steps/build-cmake.yml
@@ -1,25 +1,19 @@
 parameters:
- name: os
-  type: string
-  default: 'ubuntu2204'
 - name: componentName
  type: string
  default: ''
 - name: extraBuildFlags
  type: string
  default: ''
- name: extraCxxFlags
-  type: string
-  default: ''
 - name: multithreadFlag
  type: string
  default: ''
 - name: cmakeBuildDir
  type: string
-  default: $(Agent.BuildDirectory)/s/build
+  default: 'build'
 - name: cmakeSourceDir
  type: string
-  default: $(Agent.BuildDirectory)/s
+  default: '..'
 - name: customBuildTarget
  type: string
  default: ''
@@ -38,81 +32,41 @@ parameters:
 - name: installEnabled
  type: boolean
  default: true
-# for jobs that rebuild during install step and use ninja
-# set to true to save time, only applies for almalinux8
- name: consolidateBuildAndInstall
-  type: boolean
-  default: false
 - name: printDiskSpace
  type: boolean
  default: true
-# todo: make this control cxx and c compiler flags
- name: useAmdclang
-  type: boolean
-  default: true
-
-# for cmake calls, set env variables for AlmaLinux 8
-# to simulate running source /opt/rh/gcc-toolset-14/enable for the session

 steps:
 # create workingDirectory if it does not exist and change into it
 # call cmake from within that directory using $cmakeArgs as its parameters
 - task: CMake@1
  displayName: '${{parameters.componentName }} CMake Flags'
-  ${{ if eq(parameters.os, 'almalinux8')}}:
-    env:
-      PATH: "/opt/rh/gcc-toolset-14/root/usr/bin:$(PATH)"
-      MANPATH: "/opt/rh/gcc-toolset-14/root/usr/share/man:$(MANPATH)"
-      INFOPATH: "/opt/rh/gcc-toolset-14/root/usr/share/info:$(INFOPATH)"
-      PCP_DIR: "/opt/rh/gcc-toolset-14/root"
-      LD_LIBRARY_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64:/opt/rh/gcc-toolset-14/root/usr/lib:$(LD_LIBRARY_PATH)"
-      PKG_CONFIG_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:$(PKG_CONFIG_PATH)"
  inputs:
    workingDirectory: ${{ parameters.cmakeBuildDir }}
-    cmakeArgs: >-
-      ${{ iif(parameters.customInstallPath, join('', format('-DCMAKE_INSTALL_PREFIX={0}', parameters.installDir)), '') }}
-      ${{ iif(eq(parameters.os, 'almalinux8'), '-DCMAKE_SHARED_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/lib64 -L/opt/rh/gcc-toolset-14/root/usr/lib/gcc/x86_64-redhat-linux/14/"', '') }}
-      ${{ iif(eq(parameters.os, 'almalinux8'), '-DCMAKE_EXE_LINKER_FLAGS="-L$(Agent.BuildDirectory)/rocm/lib64 -L/opt/rh/gcc-toolset-14/root/usr/lib/gcc/x86_64-redhat-linux/14/"', '') }}
-      -DCMAKE_CXX_FLAGS="${{ parameters.extraCxxFlags }} ${{ iif(and(eq(parameters.os, 'almalinux8'), parameters.useAmdclang), '--gcc-toolchain=/opt/rh/gcc-toolset-14/root', '') }}"
-      ${{ parameters.extraBuildFlags }}
-      ${{ parameters.cmakeSourceDir }}
+    ${{ if eq(parameters.customInstallPath, true) }}:
+      cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ${{ parameters.cmakeSourceDir }}
+    ${{ else }}:
+      cmakeArgs: ${{ parameters.extraBuildFlags }} ..
 - ${{ if parameters.printDiskSpace }}:
  - script: df -h
    displayName: Disk space before build
 # equivalent to running make $cmakeTargetDir from $cmakeBuildDir
 # i.e., cd $cmakeBuildDir; make $cmakeTargetDir
 - task: CMake@1
-  ${{ if and( eq(parameters.os, 'almalinux8'), eq(parameters.consolidateBuildAndInstall , true)) }}:
-    displayName: '${{ parameters.componentName }} CMake Build and Install'
-  ${{ else }}:
-    displayName: '${{ parameters.componentName }} CMake Build'
-  ${{ if eq(parameters.os, 'almalinux8')}}:
-    env:
-      PATH: "/opt/rh/gcc-toolset-14/root/usr/bin:$(PATH)"
-      MANPATH: "/opt/rh/gcc-toolset-14/root/usr/share/man:$(MANPATH)"
-      INFOPATH: "/opt/rh/gcc-toolset-14/root/usr/share/info:$(INFOPATH)"
-      PCP_DIR: "/opt/rh/gcc-toolset-14/root"
-      LD_LIBRARY_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64:/opt/rh/gcc-toolset-14/root/usr/lib:$(LD_LIBRARY_PATH)"
-      PKG_CONFIG_PATH: "/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:$(PKG_CONFIG_PATH)"
+  displayName: '${{parameters.componentName }} Build'
  inputs:
    workingDirectory: ${{ parameters.cmakeBuildDir }}
-    ${{ if eq(parameters.os, 'almalinux8') }}:
-      cmakeArgs: >-
-        --build ${{ parameters.cmakeTargetDir }}
-        ${{ iif(and(eq(parameters.consolidateBuildAndInstall, true), ne(parameters.cmakeTarget, '')), format('--target {0}', parameters.cmakeTarget), '') }}
-        ${{ iif(and(ne(parameters.customBuildTarget, ''), ne(parameters.consolidateBuildAndInstall, true)), format('--target {0}', parameters.customBuildTarget), '') }}
-        ${{ parameters.multithreadFlag }}
-    ${{ if ne(parameters.os, 'almalinux8') }}:
-      cmakeArgs: >-
-        --build ${{ parameters.cmakeTargetDir }}
-        ${{ iif(ne(parameters.customBuildTarget, ''), format('--target {0}', parameters.customBuildTarget), '') }}
-        ${{ parameters.multithreadFlag }}
+    ${{ if eq(parameters.customBuildTarget, '') }}:
+      cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} ${{ parameters.multithreadFlag }}'
+    ${{ else }}:
+      cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.customBuildTarget }} ${{ parameters.multithreadFlag }}'
+    retryCountOnTaskFailure: 10
 - ${{ if parameters.printDiskSpace }}:
  - script: df -h
    displayName: Disk space after build
 # equivalent to running make $cmakeTarget from $cmakeBuildDir
 # e.g., make install
- ${{ if and(eq(parameters.installEnabled, true), or(ne(parameters.os, 'almalinux8'), eq(parameters.consolidateBuildAndInstall, false))) }}:
+- ${{ if eq(parameters.installEnabled, true) }}:
  - task: CMake@1
    displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
    inputs:
--- a/.azuredevops/templates/steps/checkout.yml
+++ b/.azuredevops/templates/steps/checkout.yml
@@ -4,9 +4,6 @@ parameters:
 - name: checkoutRepo
  type: string
  default: 'self'
- name: sparseCheckoutDir
-  type: string
-  default: ''
 # submodule download behaviour
 # change to 'recursive' for repos with submodules
 - name: submoduleBehaviour
@@ -18,13 +15,3 @@ steps:
    clean: true
    submodules: ${{ parameters.submoduleBehaviour }}
    retryCountOnTaskFailure: 3
-    fetchFilter: blob:none
-    ${{ if ne(parameters.sparseCheckoutDir, '') }}:
-      sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
-      path: sparse
-  - ${{ if ne(parameters.sparseCheckoutDir, '') }}:
-    - task: Bash@3
-      displayName: Symlink sparse checkout
-      inputs:
-        targetType: inline
-        script: ln -s $(Agent.BuildDirectory)/sparse/${{ parameters.sparseCheckoutDir }} $(Agent.BuildDirectory)/s
--- a/.azuredevops/templates/steps/dependencies-apt.yml
+++ b/.azuredevops/templates/steps/dependencies-apt.yml
@@ -1,42 +0,0 @@
-parameters:
- name: aptPackages
-  type: object
-  default: []
- name: registerROCmPackages
-  type: boolean
-  default: false
-
-steps:
- ${{ if eq(parameters.registerROCmPackages, true) }}:
-  - task: Bash@3
-    displayName: 'Register AMDGPU & ROCm repos (apt)'
-    inputs:
-      targetType: inline
-      script: |
-        sudo mkdir --parents --mode=0755 /etc/apt/keyrings
-        wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
-        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
-        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
-        echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
-        sudo apt update
- task: Bash@3
-  displayName: 'sudo apt-get update'
-  inputs:
-    targetType: inline
-    script: |
-      echo "deb http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
-      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
-      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
-      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
-      sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
- task: Bash@3
-  displayName: 'sudo apt-get fix'
-  inputs:
-    targetType: inline
-    script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
- ${{ if gt(length(parameters.aptPackages), 0) }}:
-  - task: Bash@3
-    displayName: 'sudo apt-get install ...'
-    inputs:
-      targetType: inline
-      script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
--- a/.azuredevops/templates/steps/dependencies-aqlprofile.yml
+++ b/.azuredevops/templates/steps/dependencies-aqlprofile.yml
@@ -1,44 +1,25 @@
-parameters:
- name: os
-  type: string
-  default: ubuntu2204
-
 steps:
 - task: Bash@3
  displayName: Get aqlprofile package name
  inputs:
    targetType: inline
-    ${{ if eq(parameters.os, 'ubuntu2204') }}:
-      script: |
-        export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
-        echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
-    ${{ if eq(parameters.os, 'almalinux8') }}:
-      script: |
-        export packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1)
-        echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
+    script: |
+      export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
+      echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
 - task: Bash@3
  displayName: 'Download aqlprofile'
  inputs:
    targetType: inline
+    script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
    workingDirectory: '$(Pipeline.Workspace)'
-    ${{ if eq(parameters.os, 'ubuntu2204') }}:
-      script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
-    ${{ if eq(parameters.os, 'almalinux8') }}:
-      script: wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$(packageName)
 - task: Bash@3
  displayName: 'Extract aqlprofile'
  inputs:
    targetType: inline
+    script: |
+      mkdir hsa-amd-aqlprofile
+      dpkg-deb -R $(packageName) hsa-amd-aqlprofile
    workingDirectory: '$(Pipeline.Workspace)'
-    ${{ if eq(parameters.os, 'ubuntu2204') }}:
-      script: |
-        mkdir hsa-amd-aqlprofile
-        dpkg-deb -R $(packageName) hsa-amd-aqlprofile
-    ${{ if eq(parameters.os, 'almalinux8') }}:
-      script: |
-        mkdir hsa-amd-aqlprofile
-        sudo dnf -y install rpm-build cpio
-        rpm2cpio $(packageName) | (cd hsa-amd-aqlprofile && cpio -idmv)
 - task: Bash@3
  displayName: 'Copy aqlprofile files'
  inputs:
--- a/.azuredevops/templates/steps/dependencies-boost.yml
+++ b/.azuredevops/templates/steps/dependencies-boost.yml
@@ -0,0 +1,35 @@
+steps:
+- task: DownloadPipelineArtifact@2
+  displayName: Download Boost
+  inputs:
+    buildType: specific
+    project: ROCm-CI
+    definition: $(BOOST_DEPENDENCY_PIPELINE_ID)
+    targetPath: $(Pipeline.Workspace)/d
+- task: ExtractFiles@1
+  displayName: Extract Boost
+  inputs:
+    archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
+    destinationFolder: $(Agent.BuildDirectory)/boost
+    cleanDestinationFolder: true
+    overwriteExistingFiles: true
+- task: DeleteFiles@1
+  displayName: Cleanup Compressed Boost
+  inputs:
+    SourceFolder: $(Pipeline.Workspace)/d
+    Contents: '**/*.tar.gz'
+    RemoveDotFiles: true
+- task: Bash@3
+  displayName: 'List Boost files'
+  inputs:
+    targetType: inline
+    script: ls -1R $(Agent.BuildDirectory)/boost
+- task: Bash@3
+  displayName: 'Link Boost shared libraries'
+  inputs:
+    targetType: inline
+    script: |
+      echo $(Agent.BuildDirectory)/boost/lib | sudo tee /etc/ld.so.conf.d/boost.conf
+      sudo cat /etc/ld.so.conf.d/boost.conf
+      sudo ldconfig -v
+      ldconfig -p
--- a/.azuredevops/templates/steps/dependencies-cmake-latest.yml
+++ b/.azuredevops/templates/steps/dependencies-cmake-latest.yml
@@ -1,23 +1,10 @@
+# replace cmake from apt install with newest version using snap install
 steps:
 - task: Bash@3
-  displayName: Install CMake 3.31
+  displayName: update cmake
  inputs:
    targetType: inline
    script: |
-      CMAKE_VERSION=3.31.0
-      CMAKE_ROOT="$(Pipeline.Workspace)/cmake"
-
-      echo "Downloading CMake $CMAKE_VERSION..."
-      curl -fsSL -o cmake.tar.gz https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
-
-      echo "Extracting to $CMAKE_ROOT..."
-      sudo mkdir -p $CMAKE_ROOT
-      sudo tar --strip-components=1 -xz -C $CMAKE_ROOT -f cmake.tar.gz
-
-      echo "##vso[task.prependpath]$CMAKE_ROOT/bin"
- task: Bash@3
-  displayName: cmake --version
-  inputs:
-    targetType: inline
-    script: |
-      cmake --version
+      sudo apt purge cmake -y
+      sudo snap install cmake --classic --channel=3.31/stable
+      hash -r
--- a/.azuredevops/templates/steps/dependencies-dnf.yml
+++ b/.azuredevops/templates/steps/dependencies-dnf.yml
@@ -1,157 +0,0 @@
-parameters:
- name: aptPackages
-  type: object
-  default: []
- name: registerROCmPackages
-  type: boolean
-  default: false
-# As par of installing gcc toolset and python,
-# the environment will install this base set of dnf packages.
- name: basePackages
-  type: object
-  default:
-    - epel-release
-    - gcc-toolset-14
-    - gcc-toolset-14-libatomic-devel
-    - git
-    - jq
-    - numactl
-    - python3.11
-    - python3.11-pip
-    - vim-common
-    - wget
-# Instead of defining multiple arrays of packages per component,
-# we define a map of apt package names to dnf package names.
- name: aptToDnfMap
-  type: object
-  default:
-    bison: bison
-    ccache: ccache
-    cmake: cmake
-    cuda-toolkit-12-9: cuda-compiler-12-9 cuda-toolkit-12-9
-    libcudnn9-dev-cuda-12: libcudnn9-cuda-12
-    dejagnu: dejagnu
-    doxygen: doxygen
-    # note: doxygen-doc is not available in dnf
-    # libavcodec-dev, libavformat-dev, libavutil-dev come with ffmpeg-devel
-    ffmpeg: ffmpeg ffmpeg-devel
-    flex: flex
-    # note: g++ is installed by default with gcc-toolset-14
-    # note: gawk is already installed
-    # note: gcc-toolset-14-gfortran is installed by default with gcc-toolset-14
-    # note: git is in the base packages list
-    graphviz: graphviz
-    libbabeltrace-dev: libbabeltrace-devel
-    libbison-dev: bison-devel
-    libboost-program-options-dev: boost-devel
-    # note: libdrm-amdgpu1 is not available in dnf
-    libdrm-dev: libdrm-devel
-    libdrm-amdgpu-dev: libdrm-amdgpu-devel
-    libdw-dev: elfutils-devel
-    libelf-dev: elfutils-libelf-devel
-    libexpat-dev: expat-devel
-    libffi-dev: libffi-devel
-    libfftw3-dev: fftw-devel
-    libgmp-dev: gmp-devel
-    liblzma-dev: xz-devel
-    libmpfr-dev: mpfr-devel
-    libmsgpack-dev: msgpack-devel
-    libncurses5-dev: ncurses-devel
-    libnuma-dev: numactl-devel
-    libopenmpi-dev: openmpi-devel
-    libpci-dev: libpciaccess-devel
-    libssl-dev: openssl-devel
-    # note: libstdc++-devel is in the base packages list
-    libsystemd-dev: systemd-devel
-    libtool: libtool
-    # note: libudev-dev is part of systemd-devel
-    libva-amdgpu-dev: libva-amdgpu-devel
-    mesa-amdgpu-va-drivers: mesa-amdgpu-va-drivers
-    mesa-common-dev: mesa-libGL-devel
-    ncurses-dev: ncurses-devel
-    # note: llvm needs ninja-build version newer than what dnf provides
-    ocl-icd-libopencl1: ocl-icd
-    ocl-icd-opencl-dev: ocl-icd-devel
-    opencl-headers: opencl-headers
-    parallel: parallel
-    pkg-config: pkgconf-pkg-config
-    # note: python3 is the default python in AlmaLinux 8
-    python3-dev: python3.11-devel
-    # note: python3.11-pip is already installed when updating to python 3.11
-    # note: python3.11-setuptools is already installed when updating to python 3.11
-    texinfo: texinfo
-    zlib1g-dev: zlib-devel
-
-steps:
- ${{ if eq(parameters.registerROCmPackages, true) }}:
-  - task: Bash@3
-    displayName: 'Register AMDGPU & ROCm repos (dnf)'
-    inputs:
-      targetType: inline
-      script: |
-        sudo rpm --import https://repo.radeon.com/rocm/rocm.gpg.key
-        echo '[amdgpu]' | sudo tee /etc/yum.repos.d/amdgpu.repo > /dev/null
-        echo "name=amdgpu" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
-        echo "baseurl=https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/rhel/8.10/main/x86_64/" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
-        echo "enabled=1" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
-        echo "gpgcheck=1" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
-        echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" | sudo tee --append /etc/yum.repos.d/amdgpu.repo
-        echo '[rocm]' | sudo tee /etc/yum.repos.d/rocm.repo > /dev/null
-        echo "name=ROCm$(REPO_RADEON_VERSION)" | sudo tee --append /etc/yum.repos.d/rocm.repo
-        echo "baseurl=https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/" | sudo tee --append /etc/yum.repos.d/rocm.repo
-        echo "enabled=1" | sudo tee --append /etc/yum.repos.d/rocm.repo
-        echo "gpgcheck=1" | sudo tee --append /etc/yum.repos.d/rocm.repo
-        echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" | sudo tee --append /etc/yum.repos.d/rocm.repo
-        sudo dnf clean all
-        sudo dnf makecache
- task: Bash@3
-  displayName: 'Install base dnf packages'
-  inputs:
-    targetType: inline
-    script: |
-      sudo dnf config-manager --set-enabled powertools
-      # rpm fusion free repo for some dependencies
-      sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
-      sudo dnf -y install ${{ join(' ', parameters.basePackages) }}
- task: Bash@3
-  displayName: 'Check gcc environment'
-  inputs:
-    targetType: inline
-    script: |
-      echo "=== Versions and sanity checks ==="
-      gcc --version
-      g++ --version
-      gcc -print-file-name=libstdc++.so
-      g++ -print-file-name=libstdc++.so
- task: Bash@3
-  displayName: 'Set python 3.11 as default'
-  inputs:
-    targetType: inline
-    script: |
-      sudo dnf -y module disable python36
-      sudo rm -f /usr/local/bin/python3.12 /usr/local/bin/python3.13 /usr/local/bin/python3.14
-      sudo alternatives --set python /usr/bin/python3.11
-      sudo alternatives --set python3 /usr/bin/python3.11
-      python3 --version
-      python3 -m pip install --upgrade pip setuptools wheel
- ${{ each pkg in parameters.aptPackages }}:
-  # note: llvm needs ninja-build version newer than what dnf provides
-  - ${{ if eq(pkg, 'ninja-build') }}:
-    - task: Bash@3
-      displayName: 'Install ninja 1.11.1'
-      inputs:
-        targetType: inline
-        script: |
-          curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip
-          sudo dnf -y install unzip
-          unzip ninja-linux.zip
-          sudo mv ninja /usr/local/bin/ninja
-          sudo chmod +x /usr/local/bin/ninja
-          echo "##vso[task.prependpath]/usr/local/bin"
-  - ${{ if ne(parameters.aptToDnfMap[pkg], '') }}:
-    - task: Bash@3
-      displayName: 'dnf install ${{ parameters.aptToDnfMap[pkg] }}'
-      inputs:
-        targetType: inline
-        script: |
-          sudo dnf -y install ${{ parameters.aptToDnfMap[pkg] }}
--- a/.azuredevops/templates/steps/dependencies-other.yml
+++ b/.azuredevops/templates/steps/dependencies-other.yml
@@ -9,24 +9,56 @@ parameters:
 - name: registerROCmPackages
  type: boolean
  default: false
- name: packageManager
-  type: string
-  default: apt

 steps:
- ${{ if eq(parameters.packageManager, 'apt') }}:
-  - template: dependencies-apt.yml
-    parameters:
-      aptPackages: ${{ parameters.aptPackages }}
-      registerROCmPackages: ${{ parameters.registerROCmPackages }}
- ${{ if eq(parameters.packageManager, 'dnf') }}:
-  - template: dependencies-dnf.yml
-    parameters:
-      aptPackages: ${{ parameters.aptPackages }}
-      registerROCmPackages: ${{ parameters.registerROCmPackages }}
+- ${{ if eq(parameters.registerROCmPackages, true) }}:
+  - task: Bash@3
+    displayName: 'Register AMDGPU & ROCm repos'
+    inputs:
+      targetType: inline
+      script: |
+        sudo mkdir --parents --mode=0755 /etc/apt/keyrings
+        wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
+        echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
+        sudo apt update
+# firefox takes time to upgrade and is not needed for CI workloads, hold version
+- task: Bash@3
+  continueOnError: true
+  displayName: 'sudo apt-mark hold firefox'
+  inputs:
+    targetType: inline
+    script: sudo apt-mark hold firefox
+- task: Bash@3
+  displayName: 'sudo apt-get update'
+  inputs:
+    targetType: inline
+    script: |
+      echo "deb http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
+      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
+      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
+      echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
+      sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
+- task: Bash@3
+  displayName: 'sudo apt-get upgrade'
+  inputs:
+    targetType: inline
+    script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes upgrade
+- task: Bash@3
+  displayName: 'sudo apt-get fix'
+  inputs:
+    targetType: inline
+    script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
+- ${{ if gt(length(parameters.aptPackages), 0) }}:
+  - task: Bash@3
+    displayName: 'sudo apt-get install ...'
+    inputs:
+      targetType: inline
+      script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
 - ${{ if gt(length(parameters.pipModules), 0) }}:
  - task: Bash@3
    displayName: 'pip install  ...'
    inputs:
      targetType: inline
-      script: python3 -m pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
+      script: pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -13,9 +13,6 @@ parameters:
 - name: dependencyList
  type: object
  default: []
- name: os
-  type: string
-  default: 'ubuntu2204'
 - name: gpuTarget
  type: string
  default: ''
@@ -39,10 +36,6 @@ parameters:
 - name: aggregatePipeline
  type: boolean
  default: false
-# monorepo related parameters
- name: downstreamAggregateNames
-  type: string
-  default: ''

 - name: componentVarList
  type: object
@@ -110,7 +103,7 @@ parameters:
    hipCUB:
      pipelineId: $(HIPCUB_PIPELINE_ID)
      stagingBranch: develop
-      mainlineBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipFFT:
      pipelineId: $(HIPFFT_PIPELINE_ID)
@@ -130,7 +123,7 @@ parameters:
    hipRAND:
      pipelineId: $(HIPRAND_PIPELINE_ID)
      stagingBranch: develop
-      mainlineBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    hipSOLVER:
      pipelineId: $(HIPSOLVER_PIPELINE_ID)
@@ -265,7 +258,7 @@ parameters:
    rocPRIM:
      pipelineId: $(ROCPRIM_PIPELINE_ID)
      stagingBranch: develop
-      mainlineBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rocprofiler:
      pipelineId: $(ROCPROFILER_PIPELINE_ID)
@@ -305,7 +298,7 @@ parameters:
    rocRAND:
      pipelineId: $(ROCRAND_PIPELINE_ID)
      stagingBranch: develop
-      mainlineBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    rocr_debug_agent:
      pipelineId: $(ROCR_DEBUG_AGENT_PIPELINE_ID)
@@ -330,7 +323,7 @@ parameters:
    rocThrust:
      pipelineId: $(ROCTHRUST_PIPELINE_ID)
      stagingBranch: develop
-      mainlineBranch: develop
+      mainlineBranch: mainline
      hasGpuTarget: true
    roctracer:
      pipelineId: $(ROCTRACER_PIPELINE_ID)
@@ -368,7 +361,7 @@ steps:
        pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
-          fileFilter: "${{ split(dependency, ':')[1] }}*_${{ parameters.os }}_${{ parameters.gpuTarget }}"
+          fileFilter: "${{ split(dependency, ':')[1] }}*${{ parameters.gpuTarget }}"
        # dependencySource = staging
        ${{ if eq(parameters.dependencySource, 'staging')}}:
          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
@@ -391,14 +384,6 @@ steps:
        ${{ else }}:
          branchName: ${{ parameters.componentVarList[split(dependency, ':')[0]].stagingBranch }}
 # no colon (:) found in this item in the list
-  - ${{ elseif containsValue(split(parameters.downstreamAggregateNames, '+'), dependency) }}:
-    - template: local-artifact-download.yml
-      parameters:
-        ${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
-          gpuTarget: ${{ parameters.gpuTarget }}
-        preTargetFilter: ${{ dependency }}
-        os: ${{ parameters.os }}
-        buildType: current
  - ${{ else }}:
    - template: artifact-download.yml
      parameters:
@@ -406,9 +391,7 @@ steps:
        pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
-          fileFilter: ${{ parameters.os }}_${{ parameters.gpuTarget }}
-        ${{ else }}:
-          fileFilter: ${{ parameters.os }}
+          fileFilter: ${{ parameters.gpuTarget }}
        # dependencySource = staging
        ${{ if eq(parameters.dependencySource, 'staging')}}:
          branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
@@ -436,16 +419,14 @@ steps:
    displayName: Symlink from rocm/llvm to rocm/lib/llvm
    inputs:
      targetType: inline
-      script: |
-        sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
-        sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
+      script: sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
  - task: Bash@3
    displayName: Symlink executables from rocm/llvm/bin to rocm/bin
    inputs:
      targetType: inline
      script: |
        for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
-          sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
+          sudo ln -s $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
        done
 # dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
 # the convention is as follows:
@@ -482,7 +463,7 @@ steps:
  displayName: 'List downloaded ROCm files'
  inputs:
    targetType: inline
-    script: ls -la1R $(Agent.BuildDirectory)/rocm
+    script: ls -1R $(Agent.BuildDirectory)/rocm
 - ${{ if eq(parameters.skipLibraryLinking, false) }}:
  - task: Bash@3
    displayName: 'Link ROCm shared libraries'
@@ -490,10 +471,8 @@ steps:
      targetType: inline
 # OS ignores if the ROCm lib folder shows up more than once
      script: |
-        echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
+        echo $(Agent.BuildDirectory)/rocm/lib | sudo tee /etc/ld.so.conf.d/rocm-ci.conf
        echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-        echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
-        echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
        sudo cat /etc/ld.so.conf.d/rocm-ci.conf
        sudo ldconfig -v
        ldconfig -p
--- a/.azuredevops/templates/steps/dependencies-vendor.yml
+++ b/.azuredevops/templates/steps/dependencies-vendor.yml
@@ -1,53 +0,0 @@
-parameters:
- name: os
-  type: string
-  default: 'ubuntu2204'
- name: dependencyList
-  type: object
- name: pipelineIdList
-  type: object
-  default:
-    boost: 250
-    grpc: 72
-    gtest: 73
-    half560: 68
-    lapack: 69
-
-steps:
- ${{ each dependency in parameters.dependencyList }}:
-  - task: DownloadPipelineArtifact@2
-    displayName: Download ${{ dependency }}
-    inputs:
-      project: ROCm-CI
-      buildType: specific
-      targetPath: $(Pipeline.Workspace)/d
-      definition: ${{ parameters.pipelineIdList[dependency] }}
-      itemPattern: '**/*${{ parameters.os }}*'
-  - task: ExtractFiles@1
-    displayName: Extract ${{ dependency }}
-    inputs:
-      archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
-      destinationFolder: $(Agent.BuildDirectory)/vendor
-      cleanDestinationFolder: true
-      overwriteExistingFiles: true
-  - task: DeleteFiles@1
-    displayName: Clean up ${{ dependency }}
-    inputs:
-      SourceFolder: $(Pipeline.Workspace)/d
-      Contents: '**/*.tar.gz'
-      RemoveDotFiles: true
- task: Bash@3
-  displayName: List vendored files
-  inputs:
-    targetType: inline
-    script: ls -la1R $(Agent.BuildDirectory)/vendor
- task: Bash@3
-  displayName: Link vendored shared libraries
-  inputs:
-    targetType: inline
-    script: |
-      echo $(Agent.BuildDirectory)/vendor/lib | sudo tee -a /etc/ld.so.conf.d/vendor.conf
-      echo $(Agent.BuildDirectory)/vendor/lib64 | sudo tee -a /etc/ld.so.conf.d/vendor.conf
-      sudo cat /etc/ld.so.conf.d/vendor.conf
-      sudo ldconfig -v
-      ldconfig -p
--- a/.azuredevops/templates/steps/docker-container.yml
+++ b/.azuredevops/templates/steps/docker-container.yml
@@ -106,7 +106,6 @@ parameters:
  type: object
  default:
    - gfx90a
-    - gfx942

 steps:
 # these steps should only be run if there was a failure or warning
--- a/.azuredevops/templates/steps/local-artifact-download.yml
+++ b/.azuredevops/templates/steps/local-artifact-download.yml
@@ -2,9 +2,6 @@
 # It can be overridden to download any artifact from any pipeline, given the appropriate build/pipeline IDs

 parameters:
-  - name: os
-    type: string
-    default: 'ubuntu2204'
  - name: gpuTarget
    type: string
    default: ''
@@ -32,27 +29,25 @@ parameters:

 steps:
  - task: DownloadPipelineArtifact@2
-    displayName: Download ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
+    displayName: 'Download Pipeline Build'
    inputs:
      ${{ if eq(parameters.buildType, 'specific') }}:
        buildType: specific
        buildVersionToDownload: specific
        project: ROCm-CI
-        ${{ if ne(parameters.definitionId, 0) }}:
-          definition: ${{ parameters.definitionId }}
-        ${{ if ne(parameters.buildId, 0) }}:
-          buildId: ${{ parameters.buildId }}
-      itemPattern: '**/*${{ parameters.preTargetFilter }}*${{ parameters.os }}_${{ parameters.gpuTarget }}*${{ parameters.postTargetFilter }}*'
+        definition: ${{ parameters.definitionId }}
+        buildId: ${{ parameters.buildId }}
+      itemPattern: '**/*${{ parameters.preTargetFilter }}*${{ parameters.gpuTarget }}*${{ parameters.postTargetFilter }}*'
      targetPath: $(Pipeline.Workspace)/d
  - task: ExtractFiles@1
-    displayName: Extract ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
+    displayName: 'Extract Pipeline Build'
    inputs:
      archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
      destinationFolder: '$(Agent.BuildDirectory)/rocm'
      cleanDestinationFolder: false
      overwriteExistingFiles: true
  - task: DeleteFiles@1
-    displayName: Clean up ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
+    displayName: 'Clean up Compressed Pipeline Build'
    inputs:
      SourceFolder: '$(Pipeline.Workspace)/d'
      Contents: '/**/*.tar.xz'
--- a/.azuredevops/templates/steps/manifest.yml
+++ b/.azuredevops/templates/steps/manifest.yml
@@ -1,19 +1,10 @@
 parameters:
- name: componentName
+- name: artifactName
  type: string
-  default: $(Build.DefinitionName)
- name: sparseCheckoutDir
-  type: string
-  default: ''
+  default: 'drop'
 - name: gpuTarget
  type: string
  default: ''
- name: artifactName
-  type: string
-  default: drop
- name: os
-  type: string
-  default: 'ubuntu2204'

 steps:
 - task: Bash@3
@@ -34,9 +25,8 @@ steps:

      IS_TAG_BUILD=$(jq 'has("release_repo")' resources.repositories)
      IS_AOMP_BUILD=$(jq 'has("aomp_repo")' resources.repositories)
-      IS_MATHLIBS_BUILD=$(jq 'has("libraries_repo")' resources.repositories)

-      if [ "$IS_TAG_BUILD" = "true" ] || [ "$IS_AOMP_BUILD" = "true" ] || [ "$IS_MATHLIBS_BUILD" = "true" ]; then
+      if [ "$IS_TAG_BUILD" = "true" ] || [ "$IS_AOMP_BUILD" = "true" ]; then
        exclude_keys=("pipelines_repo" "self") # Triggered by a file under ROCm/ROCm
      else
        exclude_keys=("pipelines_repo") # Triggered by a file under a component repo
@@ -55,7 +45,6 @@ steps:
              buildId: "$(Build.BuildId)",
              repoId: $entry.value.id,
              repoName: $entry.value.name,
-              repoSparse: "${{ parameters.sparseCheckoutDir }}",
              repoRef: $entry.value.ref,
              repoUrl: $entry.value.url,
              repoVersion: $entry.value.version
@@ -66,7 +55,7 @@ steps:
        )
      ' resources.repositories)

-      manifest_json=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
+      manifest_json=$(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json

      dependencies=()
      for manifest_file in $(Pipeline.Workspace)/d/**/manifest_*.json; do
@@ -92,7 +81,6 @@ steps:
          "<tr><td>" + .buildNumber + "</td>" +
          "<td><a href=\"https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=" + .buildId + "\">" + .buildId + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "\">" + .repoName + "</a></td>" +
-          "<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "/" + .repoSparse + "\">" + .repoSparse + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "\">" + .repoRef + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "/commit/" + .repoVersion + "\">" + .repoVersion + "</a></td></tr>"
        ')
@@ -105,7 +93,6 @@ steps:
          "<tr><td>" + .buildNumber + "</td>" +
          "<td><a href=\"https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=" + .buildId + "\">" + .buildId + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "\">" + .repoName + "</a></td>" +
-          "<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "/" + .repoSparse + "\">" + .repoSparse + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "/tree/" + .repoRef + "\">" + .repoRef + "</a></td>" +
          "<td><a href=\"" + .repoUrl + "/commit/" + .repoVersion + "\">" + .repoVersion + "</a></td></tr>"
        ')
@@ -120,7 +107,7 @@ steps:
  inputs:
    targetType: inline
    script: |
-      manifest_html=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
+      manifest_html=$(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
      cat <<EOF > $manifest_html
      <html>
      <h1>Manifest</h1>
@@ -130,7 +117,6 @@ steps:
        <th>Build Number</th>
        <th>Build ID</th>
        <th>Repo Name</th>
-        <th>Repo Sparse</th>
        <th>Repo Ref</th>
        <th>Repo Version</th>
      </tr>
@@ -142,7 +128,6 @@ steps:
        <th>Build Number</th>
        <th>Build ID</th>
        <th>Repo Name</th>
-        <th>Repo Sparse</th>
        <th>Repo Ref</th>
        <th>Repo Version</th>
      </tr>
@@ -163,7 +148,7 @@ steps:
  continueOnError: true
  inputs:
    tabName: Manifest
-    reportDir: $(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
+    reportDir: $(Build.ArtifactStagingDirectory)/manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
 - task: Bash@3
  displayName: Save manifest artifact file name
  condition: always()
@@ -172,5 +157,5 @@ steps:
    workingDirectory: $(Pipeline.Workspace)
    targetType: inline
    script: |
-      echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
-      echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
+      echo "manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
+      echo "manifest_$(Build.DefinitionName)_$(Build.BuildId)_$(Build.BuildNumber)_ubuntu2204_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
--- a/.azuredevops/templates/steps/miopen-get-ck-build.yml
+++ b/.azuredevops/templates/steps/miopen-get-ck-build.yml
@@ -25,7 +25,7 @@ steps:
      echo "Fetching CK build ID for commit $CK_COMMIT"
      CK_CHECKS_URL="$GH_API/composable_kernel/commits/${CK_COMMIT}/check-runs"
      CK_BUILD_ID=$(curl -s $CK_CHECKS_URL | \
-        jq '.check_runs[] | select(.name == "composable_kernel" and .app.slug == "azure-pipelines" and .conclusion == "success") | .details_url' | \
+        jq '.check_runs[] | select(.name == "composable_kernel" and .app.slug == "azure-pipelines") | .details_url' | \
        tr -d '"' | grep -oP 'buildId=\K\d+')

      # If none found, use latest successful CK build instead
--- a/.azuredevops/templates/steps/preamble.yml
+++ b/.azuredevops/templates/steps/preamble.yml
@@ -3,27 +3,10 @@
 # also display installed components and packages
 steps:
 - task: Bash@3
-  displayName: OS Version
+  displayName: List apt packages
  inputs:
    targetType: inline
-    script: cat /etc/os-release
- task: Bash@3
-  displayName: List installed packages (apt, dnf, or yum)
-  inputs:
-    targetType: inline
-    script: |
-      if command -v apt >/dev/null 2>&1; then
-        echo "Listing installed packages with apt:"
-        apt list --installed
-      elif command -v dnf >/dev/null 2>&1; then
-        echo "Listing installed packages with dnf:"
-        dnf list installed
-      elif command -v yum >/dev/null 2>&1; then
-        echo "Listing installed packages with yum:"
-        yum list installed
-      else
-        echo "No supported package manager found (apt, dnf, yum)."
-      fi
+    script: apt list --installed
 - task: Bash@3
  displayName: Print Python version
  inputs:
@@ -33,7 +16,7 @@ steps:
  displayName: List Python packages
  inputs:
    targetType: inline
-    script: python3 -m pip list -v
+    script: pip list -v
 # The "Azure Pipelines" agents install CMake in multiple ways, including a standalone install into /usr/local/bin:
 # https://github.com/actions/runner-images/blob/6d939a3ab352a54a021dd67b071577287b6f14a5/images/ubuntu/scripts/build/install-cmake.sh#L27
 # This standalone CMake does not have a fixed version, and is not the same version as the one installed by the package manager
--- a/.azuredevops/templates/steps/test.yml
+++ b/.azuredevops/templates/steps/test.yml
@@ -2,27 +2,21 @@ parameters:
 - name: componentName
  type: string
  default: ''
- name: os
-  type: string
-  default: ubuntu2204
 - name: testDir
  type: string
-  default: build
+  default: 'build'
 - name: testExecutable
  type: string
-  default: ctest
+  default: 'ctest'
 - name: testParameters
  type: string
-  default: --output-on-failure --force-new-ctest-process --output-junit test_output.xml
- name: extraTestParameters
-  type: string
-  default: ''
+  default: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
 - name: testOutputFile
  type: string
  default: test_output.xml
 - name: testOutputFormat
  type: string
-  default: JUnit
+  default: 'JUnit'
  values:
    - JUnit
    - NUnit
@@ -32,28 +26,26 @@ parameters:
 - name: testPublishResults
  type: boolean
  default: true
- name: allowComponentTestFailure
+- name: allowPartiallySucceededBuilds
  type: object
  default:
    - amdsmi
-    - HIPIFY
-    - rocm_smi_lib
-    - roctracer
-    # the following do not use this template but allow test failures, included for completeness
    - aomp
-    - ROCgdb
+    - HIPIFY
+    - MIVisionX
+    - rocm_smi_lib
+    - rocprofiler-sdk
+    - roctracer

 steps:
 # run test, continue on failure to publish results
 # and to publish build artifacts
 - task: Bash@3
  displayName: '${{ parameters.componentName }} Test'
-  continueOnError: ${{ containsValue(parameters.allowComponentTestFailure, parameters.componentName) }}
+  continueOnError: ${{ containsValue(parameters.allowPartiallySucceededBuilds, parameters.componentName) }}
  inputs:
    targetType: inline
-    script: |
-      ${{ iif(eq(parameters.os, 'almalinux8'), 'source /opt/rh/gcc-toolset-14/enable', '') }}
-      ${{ parameters.testExecutable }} ${{ parameters.testParameters }} ${{ parameters.extraTestParameters }}
+    script: ${{ parameters.testExecutable }} ${{ parameters.testParameters }}
    workingDirectory: ${{ parameters.testDir }}
 - ${{ if parameters.testPublishResults }}:
  - task: PublishTestResults@2
--- a/.azuredevops/variables-global.yml
+++ b/.azuredevops/variables-global.yml
@@ -3,8 +3,6 @@
 variables:
 - name: RESOURCES_REPOSITORIES
  value: $[ convertToJson(resources.repositories) ]
- name: CCACHE_DIR
-  value: $(Pipeline.Workspace)/ccache
 - name: CI_ROOT_PATH
  value: /.azuredevops
 - name: CI_COMPONENT_PATH
@@ -32,136 +30,320 @@ variables:
 - name: GFX90A_TEST_POOL
  value: gfx90a_test_pool
 - name: LATEST_RELEASE_VERSION
-  value: 6.4.1
+  value: 6.4.0
 - name: REPO_RADEON_VERSION
-  value: 6.4.1
+  value: 6.4
 - name: NEXT_RELEASE_VERSION
-  value: 7.0.0
+  value: 6.5.0
 - name: LATEST_RELEASE_TAG
-  value: rocm-6.4.1
+  value: rocm-6.4.0
 - name: DOCKER_SKIP_GFX
  value: gfx90a
+- name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
+  value: 197
 - name: AMDMIGRAPHX_PIPELINE_ID
  value: 113
+- name: AMDMIGRAPHX_TAGGED_PIPELINE_ID
+  value: 60
 - name: AMDSMI_PIPELINE_ID
  value: 99
+- name: AMDSMI_TAGGED_PIPELINE_ID
+  value: 33
 - name: AOMP_EXTRAS_PIPELINE_ID
  value: 111
+- name: AOMP_EXTRAS_TAGGED_PIPELINE_ID
+  value: 75
 - name: AOMP_PIPELINE_ID
  value: 115
+- name: AOMP_TAGGED_PIPELINE_ID
+  value: 76
+- name: CCACHE_DIR
+  value: $(Pipeline.Workspace)/ccache
 - name: CLR_PIPELINE_ID
  value: 145
+- name: CLR_TAGGED_PIPELINE_ID
+  value: 71
+- name: COMPOSABLE_KERNEL_GFX942_TEST_PIPELINE_ID
+  value: 179
 - name: COMPOSABLE_KERNEL_PIPELINE_ID
  value: 86
+- name: COMPOSABLE_KERNEL_TAGGED_PIPELINE_ID
+  value: 38
 - name: FLANG_LEGACY_PIPELINE_ID
  value: 77
+- name: FLANG_LEGACY_TAGGED_PIPELINE_ID
+  value: 77
 - name: HALF_PIPELINE_ID
  value: 101
+- name: HALF_TAGGED_PIPELINE_ID
+  value: 11
 - name: HALF560_PIPELINE_ID
  value: 68
 - name: HALF560_BUILD_ID
  value: 621
 - name: HIP_PIPELINE_ID
  value: 93
+- name: HIP_TAGGED_PIPELINE_ID
+  value: 31
 - name: HIP_TESTS_PIPELINE_ID
  value: 233
+- name: HIP_TESTS_TAGGED_PIPELINE_ID
+  value: 220
 - name: HIPBLAS_COMMON_PIPELINE_ID
  value: 223
+- name: HIPBLAS_COMMON_TAGGED_PIPELINE_ID
+  value: 224
+- name: HIPBLAS_GFX942_TEST_PIPELINE_ID
+  value: 202
 - name: HIPBLAS_PIPELINE_ID
  value: 87
+- name: HIPBLAS_TAGGED_PIPELINE_ID
+  value: 44
+- name: HIPBLASLT_GFX942_TEST_PIPELINE_ID
+  value: 187
 - name: HIPBLASLT_PIPELINE_ID
  value: 112
+- name: HIPBLASLT_TAGGED_PIPELINE_ID
+  value: 45
+- name: HIPCUB_GFX942_TEST_PIPELINE_ID
+  value: 186
 - name: HIPCUB_PIPELINE_ID
-  value: 277
+  value: 97
+- name: HIPCUB_TAGGED_PIPELINE_ID
+  value: 46
+- name: HIPFFT_GFX942_TEST_PIPELINE_ID
+  value: 198
 - name: HIPFFT_PIPELINE_ID
  value: 121
+- name: HIPFFT_TAGGED_PIPELINE_ID
+  value: 12
 - name: HIPFORT_PIPELINE_ID
  value: 102
+- name: HIPFORT_TAGGED_PIPELINE_ID
+  value: 34
 - name: HIPIFY_PIPELINE_ID
  value: 92
+- name: HIPIFY_TAGGED_PIPELINE_ID
+  value: 13
+- name: HIPRAND_GFX942_TEST_PIPELINE_ID
+  value: 188
 - name: HIPRAND_PIPELINE_ID
-  value: 275
+  value: 90
+- name: HIPRAND_TAGGED_PIPELINE_ID
+  value: 42
+- name: HIPSOLVER_GFX942_TEST_PIPELINE_ID
+  value: 201
 - name: HIPSOLVER_PIPELINE_ID
  value: 84
+- name: HIPSOLVER_TAGGED_PIPELINE_ID
+  value: 52
+- name: HIPSPARSE_GFX942_TEST_PIPELINE_ID
+  value: 195
 - name: HIPSPARSE_PIPELINE_ID
  value: 83
+- name: HIPSPARSE_TAGGED_PIPELINE_ID
+  value: 14
+- name: HIPSPARSELT_GFX942_TEST_PIPELINE_ID
+  value: 200
 - name: HIPSPARSELT_PIPELINE_ID
  value: 104
+- name: HIPSPARSELT_TAGGED_PIPELINE_ID
+  value: 53
+- name: HIPTENSOR_GFX942_TEST_PIPELINE_ID
+  value: 192
 - name: HIPTENSOR_PIPELINE_ID
  value: 105
+- name: HIPTENSOR_TAGGED_PIPELINE_ID
+  value: 56
 - name: LLVM_PROJECT_PIPELINE_ID
  value: 2
+- name: LLVM_PROJECT_TAGGED_PIPELINE_ID
+  value: 8
 - name: MIOPEN_PIPELINE_ID
  value: 108
+- name: MIOPEN_TAGGED_PIPELINE_ID
+  value: 58
 - name: MIVISIONX_PIPELINE_ID
  value: 80
+- name: MIVISIONX_TAGGED_PIPELINE_ID
+  value: 18
+- name: OMNIPERF_PIPELINE_ID
+  value: 241
+- name: OMNIPERF_TAGGED_PIPELINE_ID
+  value: 242
+- name: OMNITRACE_PIPELINE_ID
+  value: 253
+- name: OMNITRACE_TAGGED_PIPELINE_ID
+  value: 252
+- name: RCCL_GFX942_TEST_PIPELINE_ID
+  value: 184
 - name: RCCL_PIPELINE_ID
  value: 107
+- name: RCCL_TAGGED_PIPELINE_ID
+  value: 15
 - name: RDC_PIPELINE_ID
  value: 100
+- name: RDC_TAGGED_PIPELINE_ID
+  value: 59
 - name: ROCAL_PIPELINE_ID
  value: 151
+- name: ROCALUTION_GFX942_TEST_PIPELINE_ID
+  value: 196
 - name: ROCALUTION_PIPELINE_ID
  value: 89
+- name: ROCALUTION_TAGGED_PIPELINE_ID
+  value: 16
+- name: ROCBLAS_GFX942_TEST_PIPELINE_ID
+  value: 185
 - name: ROCBLAS_PIPELINE_ID
  value: 85
+- name: ROCBLAS_TAGGED_PIPELINE_ID
+  value: 32
 - name: ROCDBGAPI_PIPELINE_ID
  value: 135
+- name: ROCDBGAPI_TAGGED_PIPELINE_ID
+  value: 17
 - name: ROCDECODE_PIPELINE_ID
  value: 79
+- name: ROCDECODE_TAGGED_PIPELINE_ID
+  value: 21
+- name: ROCFFT_GFX942_TEST_PIPELINE_ID
+  value: 189
 - name: ROCFFT_PIPELINE_ID
  value: 120
+- name: ROCFFT_TAGGED_PIPELINE_ID
+  value: 19
 - name: ROCGDB_PIPELINE_ID
  value: 134
+- name: ROCGDB_TAGGED_PIPELINE_ID
+  value: 50
 - name: ROCJPEG_PIPELINE_ID
  value: 262
+- name: ROCJPEG_TAGGED_PIPELINE_ID
+  value: 263
 - name: ROCM_BANDWIDTH_TEST_PIPELINE_ID
  value: 88
+- name: ROCM_BANDWIDTH_TEST_TAGGED_PIPELINE_ID
+  value: 23
 - name: ROCM_CMAKE_PIPELINE_ID
  value: 6
+- name: ROCM_CMAKE_TAGGED_PIPELINE_ID
+  value: 7
 - name: ROCM_CORE_PIPELINE_ID
  value: 103
+- name: ROCM_CORE_TAGGED_PIPELINE_ID
+  value: 22
+- name: ROCM_EXAMPLES_GFX942_TEST_PIPELINE_ID
+  value: 204
 - name: ROCM_EXAMPLES_PIPELINE_ID
  value: 216
+- name: ROCM_EXAMPLES_TAGGED_PIPELINE_ID
+  value: 245
 - name: ROCM_SMI_LIB_PIPELINE_ID
  value: 96
+- name: ROCM_SMI_LIB_TAGGED_PIPELINE_ID
+  value: 47
 - name: ROCMINFO_PIPELINE_ID
  value: 91
+- name: ROCMINFO_TAGGED_PIPELINE_ID
+  value: 27
 - name: ROCMLIR_PIPELINE_ID
  value: 229
+- name: ROCMLIR_TAGGED_PIPELINE_ID
+  value: 62
 - name: ROCMVALIDATIONSUITE_PIPELINE_ID
  value: 106
+- name: ROCMVALIDATIONSUITE_TAGGED_PIPELINE_ID
+  value: 43
+- name: ROCPRIM_GFX942_TEST_PIPELINE_ID
+  value: 180
 - name: ROCPRIM_PIPELINE_ID
-  value: 273
+  value: 82
+- name: ROCPRIM_TAGGED_PIPELINE_ID
+  value: 20
+- name: ROCPROFILER_GFX942_TEST_PIPELINE_ID
+  value: 190
 - name: ROCPROFILER_COMPUTE_PIPELINE_ID
  value: 257
+- name: ROCPROFILER_COMPUTE_TAGGED_PIPELINE_ID
+  value: 258
 - name: ROCPROFILER_REGISTER_PIPELINE_ID
  value: 1
+- name: ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID
+  value: 25
 - name: ROCPROFILER_SDK_PIPELINE_ID
  value: 246
+- name: ROCPROFILER_SDK_TAGGED_PIPELINE_ID
+  value: 234
 - name: ROCPROFILER_SYSTEMS_PIPELINE_ID
  value: 255
+- name: ROCPROFILER_SYSTEMS_TAGGED_PIPELINE_ID
+  value: 254
 - name: ROCPROFILER_PIPELINE_ID
  value: 143
+- name: ROCPROFILER_TAGGED_PIPELINE_ID
+  value: 28
 - name: ROCPYDECODE_PIPELINE_ID
  value: 239
+- name: ROCPYDECODE_TAGGED_PIPELINE_ID
+  value: 232
 - name: ROCR_DEBUG_AGENT_PIPELINE_ID
  value: 136
+- name: ROCR_DEBUG_AGENT_TAGGED_PIPELINE_ID
+  value: 29
 - name: ROCR_RUNTIME_PIPELINE_ID
  value: 10
+- name: ROCR_RUNTIME_TAGGED_PIPELINE_ID
+  value: 24
+- name: ROCRAND_GFX942_TEST_PIPELINE_ID
+  value: 183
 - name: ROCRAND_PIPELINE_ID
-  value: 274
+  value: 95
+- name: ROCRAND_TAGGED_PIPELINE_ID
+  value: 41
+- name: ROCSOLVER_GFX942_TEST_PIPELINE_ID
+  value: 199
 - name: ROCSOLVER_PIPELINE_ID
  value: 81
+- name: ROCSOLVER_TAGGED_PIPELINE_ID
+  value: 55
+- name: ROCSPARSE_GFX942_TEST_PIPELINE_ID
+  value: 191
 - name: ROCSPARSE_PIPELINE_ID
  value: 98
+- name: ROCSPARSE_TAGGED_PIPELINE_ID
+  value: 67
+- name: ROCT_THUNK_INTERFACE_PIPELINE_ID
+  value: 3
+- name: ROCT_THUNK_INTERFACE_TAGGED_PIPELINE_ID
+  value: 9
+- name: ROCTHRUST_GFX942_TEST_PIPELINE_ID
+  value: 194
 - name: ROCTHRUST_PIPELINE_ID
-  value: 276
+  value: 94
+- name: ROCTHRUST_TAGGED_PIPELINE_ID
+  value: 26
+- name: ROCTRACER_GFX942_TEST_PIPELINE_ID
+  value: 181
 - name: ROCTRACER_PIPELINE_ID
  value: 141
+- name: ROCTRACER_TAGGED_PIPELINE_ID
+  value: 30
+- name: ROCWMMA_GFX942_TEST_PIPELINE_ID
+  value: 193
 - name: ROCWMMA_PIPELINE_ID
  value: 109
+- name: ROCWMMA_TAGGED_PIPELINE_ID
+  value: 57
+- name: RPP_GFX942_TEST_PIPELINE_ID
+  value: 182
 - name: RPP_PIPELINE_ID
  value: 78
+- name: RPP_TAGGED_PIPELINE_ID
+  value: 39
 - name: TRANSFERBENCH_PIPELINE_ID
  value: 265
+- name: TRANSFERBENCH_TAGGED_PIPELINE_ID
+  value: 266
+- name: BOOST_DEPENDENCY_PIPELINE_ID
+  value: 250
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -1,18 +1,3 @@
-Datacenter
-GST
-IET
-LTO
-MX
-Microscaling
-NANOO
-ROCprof
-affinitization
-amdclang
-benefitting
-demangled
-inlined
-microscaling
-roofline
 AAC
 ABI
 ACE
@@ -49,7 +34,6 @@ Autocast
 BARs
 BLAS
 BMC
-BabelStream
 Blit
 Blockwise
 Bluefield
@@ -154,7 +138,6 @@ GDR
 GDS
 GEMM
 GEMMs
-GFLOPS
 GFortran
 GFXIP
 Gemma
@@ -243,7 +226,6 @@ LM
 LSAN
 LSan
 LTS
-LanguageCrossEntropy
 LoRA
 MEM
 MERCHANTABILITY
@@ -261,7 +243,6 @@ MMIOH
 MMU
 MNIST
 MPI
-MPT
 MSVC
 MVAPICH
 MVFFR
@@ -278,7 +259,6 @@ Meta's
 Miniconda
 MirroredStrategy
 Mixtral
-MosaicML
 Multicore
 Multithreaded
 MyEnvironment
@@ -287,7 +267,6 @@ NBIO
 NBIOs
 NCCL
 NCF
-NFS
 NIC
 NICs
 NLI
@@ -350,7 +329,6 @@ PipelineParallel
 PnP
 PowerEdge
 PowerShell
-Pretrained
 Pretraining
 Profiler's
 PyPi
@@ -516,7 +494,6 @@ ZenDNN
 accuracies
 activations
 addr
-ade
 ai
 alloc
 allocatable
@@ -532,7 +509,6 @@ avx
 awk
 backend
 backends
-bb
 benchmarked
 benchmarking
 bfloat
@@ -556,7 +532,6 @@ cd
 centos
 centric
 changelog
-checkpointing
 chiplet
 cmake
 cmd
@@ -597,7 +572,6 @@ de
 deallocation
 debuggability
 debian
-deepseek
 denoise
 denoised
 denoises
@@ -621,7 +595,6 @@ embeddings
 enablement
 encodings
 endfor
-endif
 endpgm
 enqueue
 env
@@ -664,7 +637,6 @@ hipSPARSELt
 hipTensor
 hipamd
 hipblas
-hipcc
 hipcub
 hipfft
 hipfort
@@ -723,7 +695,6 @@ migratable
 miopen
 miopengemm
 mivisionx
-mixtral
 mjx
 mkdir
 mlirmiopen
@@ -865,7 +836,6 @@ subfolder
 subfolders
 submodule
 submodules
-subnet
 supercomputing
 symlink
 symlinks
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,142 +4,9 @@ This page is a historical overview of changes made to ROCm components. This
 consolidated changelog documents key modifications and improvements across
 different versions of the ROCm software stack and its components.

-## ROCm 6.4.1
-
-See the [ROCm 6.4.1 release notes](https://rocm.docs.amd.com/en/docs-6.4.1/about/release-notes.html)
-for a complete overview of this release.
-
-### **AMD SMI** (25.4.2)
-
-#### Added
-
-* Dumping CPER entries from RAS tool `amdsmi_get_gpu_cper_entries()` to Python and C APIs.
-  - Dumping CPER entries consist of `amdsmi_cper_hdr_t`.
-  - Dumping CPER entries is also enabled in the CLI interface through `sudo amd-smi ras --cper`.
-* `amdsmi_get_gpu_busy_percent` to the C API.
-
-#### Changed
-
-* Modified VRAM display for `amd-smi monitor -v`. 
-
-#### Optimized
-
-* Improved load times for CLI commands when the GPU has multiple parititons.
-
-#### Resolved issues
-
-* Fixed partition enumeration in `amd-smi list -e`, `amdsmi_get_gpu_enumeration_info()`, `amdsmi_enumeration_info_t`, `drm_card`, and `drm_render` fields.
-
-#### Known issues
-
-* When using the `--follow` flag with `amd-smi ras --cper`, CPER entries are not streamed continuously as intended. This will be fixed in an upcoming ROCm release.
-
-```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
-```
-
-### **HIP** (6.4.1)
-
-#### Added
-
-* New log mask enumeration `LOG_COMGR` enables logging precise code object information.
-
-#### Changed
-
-* HIP runtime uses device bitcode before SPIRV.
-* The implementation of preventing `hipLaunchKernel` latency degradation with number of idle streams is reverted or disabled by default.
-
-#### Optimized
-
-* Improved kernel logging includes de-mangling shader names.
-* Refined implementation in HIP APIs `hipEventRecords` and `hipStreamWaitEvent` for performance improvement.
-
-#### Resolved issues
-
-* Stale state during the graph capture. The return error was fixed, HIP runtime now always uses the latest dependent nodes during `hipEventRecord` capture.
-* Segmentation fault during kernel execution. HIP runtime now allows maximum stack size as per ISA on the GPU device.
-
-### **hipBLASLt** (0.12.1)
-
-#### Resolved issues
-
-* Fixed an accuracy issue for some solutions using an `FP32` or `TF32` data type with a TT transpose.
-
-### **RCCL** (2.22.3)
-
-#### Changed
-
-* MSCCL++ is now disabled by default. To enable it, set `RCCL_MSCCLPP_ENABLE=1`.
-
-#### Resolved issues
-
-* Fixed an issue where early termination, in rare circumstances, could cause the application to stop responding by adding synchronization before destroying a proxy thread.
-* Fixed the accuracy issue for the MSCCLPP `allreduce7` kernel in graph mode.
-
-#### Known issues
-
-* When splitting a communicator using `ncclCommSplit` in some GPU configurations, MSCCL initialization can cause a segmentation fault. The recommended workaround is to disable MSCCL with `export RCCL_MSCCL_ENABLE=0`.
-  This issue will be fixed in a future ROCm release.
-
-* Within the RCCL-UnitTests test suite, failures occur in tests ending with the
-  `.ManagedMem` and `.ManagedMemGraph` suffixes. These failures only affect the
-  test results and do not affect the RCCL component itself. This issue will be
-  resolved in a future ROCm release.
-
-### **rocALUTION** (3.2.3)
-
-#### Added
-
-* The `-a` option has been added to the `rmake.py` build script. This option allows you to select specific architectures when building on Microsoft Windows.
-
-#### Resolved issues
-
-* Fixed an issue where the `HIP_PATH` environment variable was being ignored when compiling on Microsoft Windows.
-
-### **ROCm Data Center Tool** (0.3.0)
-
-#### Added
-
- Support for GPU partitions.
- `RDC_FI_GPU_BUSY_PERCENT` metric.
-
-#### Changed
-
- Updated `rdc_field` to align with `rdc_bootstrap` for current metrics.
-
-#### Resolved issues
-
- Fixed [ROCProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html) eval metrics and memory leaks.
-
-### **ROCm SMI** (7.5.0)
-
-#### Resolved issues
-
- Fixed partition enumeration. It now refers to the correct DRM Render and Card paths.
-
-```{note}
-See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
-```
-
-### **ROCm Systems Profiler** (1.0.1)
-
-#### Added 
-
-* How-to document for [network performance profiling](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/how-to/nic-profiling.html) for standard Network Interface Cards (NICs).
-
-#### Resolved issues
-
-* Fixed a build issue with Dyninst on GCC 13.
-
-### **ROCr Runtime** (1.15.0)
-
-#### Resolved issues
-
-* Fixed a rare occurrence issue on AMD Instinct MI25, MI50, and MI100 GPUs, where the `SDMA` copies might start before the dependent Kernel finishes and could cause memory corruption.
-
 ## ROCm 6.4.0

-See the [ROCm 6.4.0 release notes](https://rocm.docs.amd.com/en/docs-6.4.0/about/release-notes.html)
+See the [ROCm 6.4.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
 for a complete overview of this release.

 ### **AMD SMI** (25.3.0)
@@ -876,10 +743,6 @@ See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rele
 #### Added 

 - Support for VA-API and rocDecode tracing.
- Aggregation of MPI data collected across distributed nodes and ranks. The data is concatenated into a single proto file.
-
-#### Changed
- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 #### Resolved issues

@@ -890,21 +753,9 @@ See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rele
 - Fixed interruption in config file generation.

 - Fixed segmentation fault while running rocprof-sys-instrument.
- Fixed an issue where running `rocprof-sys-causal` or using the `-I all` option with `rocprof-sys-sample` caused the system to become non-responsive.
-
- Fixed an issue where sampling multi-GPU Python workloads caused the system to stop responding.
-
-### **ROCm Validation Suite** (1.1.0)
-
-#### Added
-
-* Configuration files for MI210.
-* Support for OCP fp8 data type.
-* GPU index-based CLI execution.

 #### Changed
-
-* JSON logging with updated schema.
+- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).

 ### **rocPRIM** (3.4.0)

--- a/README.md
+++ b/README.md
@@ -19,17 +19,142 @@ ROCm supports programming models, such as OpenMP and OpenCL, and includes all ne
 source software compilers, debuggers, and libraries. ROCm is fully integrated into machine learning
 (ML) frameworks, such as PyTorch and TensorFlow.

-> [!IMPORTANT]
-> A new open source build platform for ROCm is under development at
-> https://github.com/ROCm/TheRock, featuring a unified CMake build with bundled
-> dependencies, Windows support, and more.
->
-> The instructions below describe the prior process for building from source
-> which will be replaced once TheRock is mature enough.
+## Getting the ROCm Source Code

-## Getting and Building ROCm from Source
+AMD ROCm is built from open source software. It is, therefore, possible to modify the various components of ROCm by downloading the source code and rebuilding the components. The source code for ROCm components can be cloned from each of the GitHub repositories using git.  For easy access to download the correct versions of each of these tools, the ROCm repository contains a repo manifest file called [default.xml](./default.xml). You can use this manifest file to download the source code for ROCm software.

-Please use [TheRock](https://github.com/ROCm/TheRock) build system to build ROCm from source.
+### Installing the repo tool
+
+The repo tool from Google allows you to manage multiple git repositories simultaneously. Run the following commands to install the repo tool:
+
+```bash
+mkdir -p ~/bin/
+curl https://storage.googleapis.com/git-repo-downloads/repo > ~/bin/repo
+chmod a+x ~/bin/repo
+```
+
+**Note:** The ```~/bin/``` folder is used as an example. You can specify a different folder to install the repo tool into if you desire.
+
+### Installing git-lfs
+
+Some ROCm projects use the Git Large File Storage (LFS) format that may require you to install git-lfs. Refer to [Git Large File Storage](https://github.com/git-lfs/git-lfs/blob/main/INSTALLING.md) for more information. For example, to install git-lfs for Ubuntu, use the following command:
+
+```bash
+sudo apt-get install git-lfs
+```
+
+### Downloading the ROCm source code
+
+The following example shows how to use the repo tool to download the ROCm source code. If you choose a directory other than ~/bin/ to install the repo tool, you must use that chosen directory in the code as shown below:
+
+```bash
+mkdir -p ~/ROCm/
+cd ~/ROCm/
+export ROCM_VERSION=6.4.0
+~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
+~/bin/repo sync
+```
+
+**Note:** Using this sample code will cause the repo tool to download the open source code associated with the specified ROCm release. Ensure that you have ssh-keys configured on your machine for your GitHub ID prior to the download as explained at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh).
+
+## Building the ROCm source code
+
+Each ROCm component repository contains directions for building that component, such as the rocSPARSE documentation [Installation and Building for Linux](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html). Refer to the specific component documentation for instructions on building the repository.
+
+Each release of the ROCm software supports specific hardware and software configurations. Refer to [System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) for the current supported hardware and OS.
+
+## Build ROCm from source
+
+The Build will use as many processors as it can find to build in parallel. Some of the compiles can consume as much as 10GB of RAM, so make sure you have plenty of Swap Space !
+
+By default the ROCm build will compile for all supported GPU architectures and will take approximately 500 CPU hours.
+The Build time will reduce significantly if we limit the GPU Architecture/s against which we need to build by using the environment variable GPU_ARCHS as mentioned below.
+
+```bash
+# --------------------------------------
+# Step1: clone source code
+# --------------------------------------
+
+mkdir -p ~/WORKSPACE/      # Or any folder name other than WORKSPACE
+cd ~/WORKSPACE/
+export ROCM_VERSION=6.4.0
+~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
+~/bin/repo sync
+
+# --------------------------------------
+# Step 2: Prepare build environment
+# --------------------------------------
+
+# Option 1: Start a docker container
+# Pulling required base docker images:
+# Ubuntu22.04 built from ROCm/tools/rocm-build/docker/ubuntu22/Dockerfile
+docker pull rocm/rocm-build-ubuntu-22.04:6.4
+# Ubuntu24.04 built from ROCm/tools/rocm-build/docker/ubuntu24/Dockerfile
+docker pull rocm/rocm-build-ubuntu-24.04:6.4
+
+# Start docker container and mount the source code folder:
+docker run -ti \
+    -e ROCM_VERSION=${ROCM_VERSION} \
+    -e CCACHE_DIR=$HOME/.ccache \
+    -e CCACHE_ENABLED=true \
+    -e DOCK_WORK_FOLD=/src \
+    -w /src \
+    -v $PWD:/src \
+    -v /etc/passwd:/etc/passwd \
+    -v /etc/shadow:/etc/shadow \
+    -v ${HOME}/.ccache:${HOME}/.ccache \
+    -u $(id -u):$(id -g) \
+    <replace_with_required_ubuntu_base_docker_image> bash
+
+# Option 2: Install required packages into the host machine
+# For ubuntu22.04 system
+cd ROCm/tools/rocm-build/docker/ubuntu22
+cp * /tmp && cd /tmp
+bash install-prerequisites.sh
+# For ubuntu24.04 system
+cd ROCm/tools/rocm-build/docker/ubuntu24
+cp * /tmp && cd /tmp
+bash install-prerequisites.sh
+
+# --------------------------------------
+# Step 3: Run build command line
+# --------------------------------------
+
+# Select GPU targets before building:
+# When GPU_ARCHS is not set, default GPU targets supported by ROCm6.1 will be used.
+# To build against a subset of GFX architectures you can use the below env variable.
+# Support MI300 (gfx940, gfx941, gfx942).
+export GPU_ARCHS="gfx942"               # Example
+export GPU_ARCHS="gfx940;gfx941;gfx942" # Example
+
+# Pick and run build commands in the docker container:
+# Build rocm-dev packages
+make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
+# Build all ROCm packages
+make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} all
+# list all ROCm components to find required components
+make -f ROCm/tools/rocm-build/ROCm.mk list_components
+# Build a single ROCm packages
+make -f ROCm/tools/rocm-build/ROCm.mk T_rocblas
+
+# Find built packages in ubuntu22.04:
+out/ubuntu-22.04/22.04/deb/
+# Find built packages in ubuntu24.04:
+out/ubuntu-24.04/24.04/deb/
+
+# Find built logs in ubuntu22.04:
+out/ubuntu-22.04/22.04/logs/
+# Find built logs in ubuntu24.04:
+out/ubuntu-24.04/24.04/logs/
+# All logs pertaining to failed components, end with .errrors extension.
+out/ubuntu-22.04/22.04/logs/rocblas.errors      # Example
+# All logs pertaining to building components, end with .inprogress extension.
+out/ubuntu-22.04/22.04/logs/rocblas.inprogress  # Example
+# All logs pertaining to passed components, use the component names.
+out/ubuntu-22.04/22.04/logs/rocblas             # Example
+```
+
+Note: [Overview for ROCm.mk](tools/rocm-build/README.md)

 ## ROCm documentation

--- a/RELEASE.md
+++ b/RELEASE.md
--- a/default.xml
+++ b/default.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-6.4.1"
+    <default revision="refs/tags/rocm-6.4.0"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
--- a/docs/about/license.md
+++ b/docs/about/license.md
@@ -81,7 +81,6 @@ additional licenses. Please review individual repositories for more information.
 | [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
 | [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
 | [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
-| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
 | [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
 | [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
 | [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -1,126 +1,121 @@
-ROCm Version,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
-      ,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
-      ,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
-      ,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
-      ,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
-      ,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
-      ,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
-      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
-      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
-      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
-      ,RDNA4,,,,,,,,,,,,,,,
-      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
-      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
-      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
-      ,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
-      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
-      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
-      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
-      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
-,,,,,,,,,,,,,,,,
-      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
-,,,,,,,,,,,,,,,,
-      ,,,,,,,,,,,,,,,,
-      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
-      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
-      ,,,,,,,,,,,,,,,,
-      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      Thrust,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      CUB,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-,,,,,,,,,,,,,,,,
-      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
-      ,,,,,,,,,,,,,,,,
-      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
-      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
-      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
-      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
-      ,,,,,,,,,,,,,,,,
-      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
-      :doc:`rocSHMEM <rocshmem:index>`,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      ,,,,,,,,,,,,,,,,
-      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
-      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
-      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
-      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
-      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
-      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
-      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
-      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
-      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
-      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
-      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
-      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
-      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
-      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
-      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
-      ,,,,,,,,,,,,,,,,
-      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
-      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
-      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
-      ,,,,,,,,,,,,,,,,
-      SUPPORT LIBS,,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
-      ,,,,,,,,,,,,,,,,
-      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
-      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
-      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
-      ,,,,,,,,,,,,,,,,
-      PERFORMANCE TOOLS,,,,,,,,,,,,,,,,
-      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
-      ,,,,,,,,,,,,,,,,
-      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,
-      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
-      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
-      ,,,,,,,,,,,,,,,,
-      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
-      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`llvm-project <llvm-project:index>`,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-,,,,,,,,,,,,,,,,
-      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
+ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
+      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
+      ,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
+      ,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
+      ,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
+      ,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
+      ,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
+      ,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
+,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
+,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
+      ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
+      ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
+      ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
+      ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
+      ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
+      ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
+      ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
+      ,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
+      ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
+      ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
+,,,,,,,,,,,,,,,
+      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
+,,,,,,,,,,,,,,,
+      ,,,,,,,,,,,,,,,
+      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
+      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
+      ,,,,,,,,,,,,,,,
+      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+,,,,,,,,,,,,,,,
+      KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
+      ,,,,,,,,,,,,,,,
+      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
+      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
+      :doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
+      :doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
+      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
+      ,,,,,,,,,,,,,,,
+      COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
+      ,,,,,,,,,,,,,,,
+      MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
+      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
+      :doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
+      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
+      :doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
+      :doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
+      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
+      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
+      :doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
+      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
+      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
+      :doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
+      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
+      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
+      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
+      :doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
+      ,,,,,,,,,,,,,,,
+      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
+      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
+      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
+      ,,,,,,,,,,,,,,,
+      SUPPORT LIBS,,,,,,,,,,,,,,,
+      `hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
+      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
+      ,,,,,,,,,,,,,,,
+      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
+      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
+      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
+      ,,,,,,,,,,,,,,,
+      PERFORMANCE TOOLS,,,,,,,,,,,,,,,
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
+      ,,,,,,,,,,,,,,,
+      DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
+      :doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
+      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
+      ,,,,,,,,,,,,,,,
+      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
+      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
+      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+,,,,,,,,,,,,,,,
+      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
+      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -23,131 +23,126 @@ compatibility and system requirements.
 .. container:: format-big-table

  .. csv-table::
-      :header: "ROCm Version", "6.4.1", "6.4.0", "6.3.0"
+      :header: "ROCm Version", "6.4.0", "6.3.3", "6.2.0"
      :stub-columns: 1

-      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
-      ,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
-      ,"RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4"
-      ,RHEL 8.10,RHEL 8.10,RHEL 8.10
-      ,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5"
-      ,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
+      :ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04
+      ,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4"
+      ,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3"
+      ,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9"
+      ,"SLES 15 SP6","SLES 15 SP6, SP5","SLES 15 SP6, SP5"
+      ,"Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_,Oracle Linux 8.9 [#mi300x]_
      ,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
      ,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
      ,.. _architecture-support-compatibility-matrix:,,
      :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
      ,CDNA2,CDNA2,CDNA2
      ,CDNA,CDNA,CDNA
-      ,RDNA4,,
      ,RDNA3,RDNA3,RDNA3
      ,RDNA2,RDNA2,RDNA2
      ,.. _gpu-support-compatibility-matrix:,,
-      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,,
-      ,gfx1200 [#RDNA-OS]_,,
-      ,gfx1101 [#RDNA-OS]_,,
-      ,gfx1100,gfx1100,gfx1100
+      :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
      ,gfx1030,gfx1030,gfx1030
-      ,gfx942,gfx942,gfx942
+      ,gfx942,gfx942,gfx942 [#mi300_620]_
      ,gfx90a,gfx90a,gfx90a
      ,gfx908,gfx908,gfx908
      ,,,
      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
-      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
-      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
-      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
+      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.3, 2.2, 2.1, 2.0, 1.13"
+      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1"
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.26
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.20,1.17.3,1.17.3
      ,,,
      THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
      `UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
      `UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
      ,,,
      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
-      Thrust,2.5.0,2.5.0,2.3.2
-      CUB,2.5.0,2.5.0,2.3.2
+      Thrust,2.5.0,2.3.2,2.2.0
+      CUB,2.5.0,2.3.2,2.2.0
      ,,,
      KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
-      :doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
+      KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x"
      ,,,
      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
-      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
-      :doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
-      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
-      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
-      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
-      :doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
+      :doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.10.0
+      :doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.2.0
+      :doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.0.0
+      :doc:`rocAL <rocal:index>`,2.2.0,2.1.0,1.0.0
+      :doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.6.0
+      :doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,N/A
+      :doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.1.0
+      :doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.8.0
      ,,,
      COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
-      :doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
-      :doc:`rocSHMEM <rocshmem:index>`,2.0.0,2.0.0,N/A
+      :doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.20.5
      ,,,
      MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
-      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0
-      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
-      :doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
-      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
-      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
-      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
-      :doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1
-      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0
-      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
-      :doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
-      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0
-      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
-      :doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
+      :doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.2.0
+      :doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.8.0
+      :doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.14
+      :doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.4.0
+      :doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.0
+      :doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.2.0
+      :doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.1
+      :doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.1
+      :doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.0
+      :doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.2.0
+      :doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.28
+      :doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.1.0
+      :doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.26.0
+      :doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.2.0
+      :doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.5.0
+      :doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.41.0
      ,,,
      PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
-      :doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
-      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0
-      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
+      :doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.2.0
+      :doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.3.0
+      :doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.2.0
+      :doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.0.1
      ,,,
      SUPPORT LIBS,,,
-      `hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42131
-      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.0
-      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
+      `hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.2.41133
+      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.2.0
+      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,20240607.1.4246
      ,,,
      SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
-      :doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1
+      :doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.6.2
      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
-      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
-      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.3.0
+      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.0.60200
      ,,,
      PERFORMANCE TOOLS,,,
      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
-      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0
-      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.0
-      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60300
-      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
-      :doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60300
+      :doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,2.0.1
+      :doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,1.11.2
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60200
+      :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.4.0
+      :doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60200
      ,,,
      DEVELOPMENT TOOLS,,,
-      :doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
-      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
-      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
-      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
+      :doc:`HIPIFY <hipify:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.13.0
+      :doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.76.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,14.2.0
      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
-      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3
      ,,,
      COMPILERS,.. _compilers-support-compatibility-matrix:,,
      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
-      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25184,19.0.0.25133,18.0.0.24455
-      :doc:`llvm-project <llvm-project:index>`,19.0.0.25184,19.0.0.25133,18.0.0.24491
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25184,19.0.0.25133,18.0.0.24491
+      `Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
+      :doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
      ,,,
      RUNTIMES,.. _runtime-support-compatibility-matrix:,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42131
-      :doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42131
+      :doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.2.41133
+      :doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.2.41133
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
-      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
+      :doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.13.0


 .. rubric:: Footnotes
@@ -157,7 +152,6 @@ compatibility and system requirements.
 .. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
 .. [#kfd_support] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
 .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
-.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.

 .. _OS-kernel-versions:

@@ -175,8 +169,7 @@ Use this lookup table to confirm which operating system and kernel versions are
   ,,
   `Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
   ,,
-   `Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
-   , 9.5, 5.14+, 2.34
+   `Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.5, 5.14+, 2.34
   ,9.4, 5.14+, 2.34
   ,9.3, 5.14+, 2.34
   ,,
@@ -237,4 +230,3 @@ Expand for full historical view of:
   .. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
   .. [#kfd_support-past-60] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
   .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
-   .. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -14,18 +14,17 @@ JAX provides a NumPy-like API, which combines automatic differentiation and the
 Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
 learning at scale.

-JAX uses composable transformations of Python and NumPy through just-in-time
-(JIT) compilation, automatic vectorization, and parallelization. To learn about
-JAX, including profiling and optimizations, see the official `JAX documentation
+JAX uses composable transformations of Python and NumPy through just-in-time (JIT) compilation,
+automatic vectorization, and parallelization. To learn about JAX, including profiling and
+optimizations, see the official `JAX documentation
 <https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.

-ROCm support for JAX is upstreamed, and users can build the official source code
-with ROCm support:
+ROCm support for JAX is upstreamed and users can build the official source code with ROCm
+support:

 - ROCm JAX release:

-  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
-    with ROCm and JAX preinstalled.
+  - Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>` with ROCm and JAX pre-installed.

  - ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_

@@ -37,8 +36,8 @@ with ROCm support:
  - Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_

  - See the `AMD GPU (Linux) installation section
-    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
-    the JAX documentation.
+    <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in the JAX
+    documentation.

 .. note::

@@ -47,44 +46,6 @@ with ROCm support:
   `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
   follow upstream JAX releases and use the latest available ROCm version.

-Use cases and recommendations
-================================================================================
-
-* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
-  blog explores the implementation and training of a Generative Pre-trained
-  Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s JAX-based
-  nanoGPT. Comparing how essential GPT components—such as self-attention 
-  mechanisms and optimizers—are realized in JAX and JAX, also highlights
-  JAX’s unique features.
-
-* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
-  ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
-  blog post provides a comprehensive guide on enhancing the training efficiency
-  of GPT models by implementing mixed precision techniques in JAX, specifically
-  tailored for AMD GPUs utilizing the ROCm platform.
-
-* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
-  blog demonstrates how to develop a custom fused dropout-activation kernel for
-  matrices using Triton, integrate it with JAX, and benchmark its performance
-  using ROCm.
-
-* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
-  outlines the process of fine-tuning a Bidirectional Encoder Representations
-  from Transformers (BERT)-based large language model (LLM) using JAX for a text
-  classification task. The blog post discuss techniques for parallelizing the
-  fine-tuning across multiple AMD GPUs and assess the model's performance on a
-  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
-  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
-  used on a multi-GPU setup.
-
-* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
-  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
-  accelerator using ROCm. The page is aimed at helping users achieve optimal
-  performance for deep learning and other high-performance computing tasks on
-  the MI300X GPU.
-
-For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
-
 .. _jax-docker-compat:

 Docker image compatibility
@@ -96,8 +57,8 @@ Docker image compatibility

 AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
 with ROCm backends on Docker Hub. The following Docker image tags and
-associated inventories represent the latest JAX version from the official Docker Hub and are validated for
-`ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_. Click the |docker-icon|
+associated inventories are validated for
+`ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click the |docker-icon|
 icon to view the image on Docker Hub.

 .. list-table:: JAX Docker image components
@@ -110,19 +71,19 @@ icon to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.1-jax0.4.35-py3.12/images/sha256-7a0745a2a2758bdf86397750bac00e9086cbf67d170cfdbb08af73f7c7d18a6a"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
+           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.12/images/sha256-4069398229078f3311128b6d276c6af377c7e97d3363d020b0bf7154fae619ca"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>

      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.1-jax0.4.35-py3.10/images/sha256-5f9e8d6e6e69fdc9a1a3f2ba3b1234c3f46c53b7468538c07fd18b00899da54f"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
+           <a href="https://hub.docker.com/layers/rocm/jax/rocm6.4-jax0.4.35-py3.10/images/sha256-a137f901f91ce6c13b424c40a6cf535248d4d20fd36d5daf5eee0570190a4a11"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>

      - `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
      - Ubuntu 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `3.10.14 <https://www.python.org/downloads/release/python-31014/>`_

 AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
 with ROCm backends on Docker Hub. The following Docker image tags and
@@ -160,12 +121,13 @@ associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/
      - Ubuntu 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_

-Key ROCm libraries for JAX
+Critical ROCm libraries for JAX
 ================================================================================

-JAX functionality on ROCm is determined by its underlying library
-dependencies. These ROCm components affect the capabilities, performance, and
-feature set available to developers.
+The functionality of JAX with ROCm is determined by its underlying library
+dependencies. These critical ROCm components affect the capabilities,
+performance, and feature set available to developers. The versions described
+are available in ROCm :version:`rocm_version`.

 .. list-table::
    :header-rows: 1
@@ -253,10 +215,10 @@ feature set available to developers.
        distributed training, which involves parallel reductions or
        operations like ``jax.numpy.cumsum`` can use rocThrust.

-Supported features
+Supported and unsupported features
 ===============================================================================

-The following table maps the public JAX API modules to their supported
+The following table maps GPU-accelerated JAX modules to their supported
 ROCm and JAX versions.

 .. list-table::
@@ -264,8 +226,8 @@ ROCm and JAX versions.

    * - Module
      - Description
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.numpy``
      - Implements the NumPy API, using the primitives in ``jax.lax``.
      - 0.1.56
@@ -293,11 +255,21 @@ ROCm and JAX versions.
        devices.
      - 0.3.20
      - 5.1.0
+    * - ``jax.dlpack``
+      - For exchanging tensor data between JAX and other libraries that support the
+        DLPack standard.
+      - 0.1.57
+      - 5.0.0
    * - ``jax.distributed``
      - Enables the scaling of computations across multiple devices on a single
        machine or across multiple machines.
      - 0.1.74
      - 5.0.0
+    * - ``jax.dtypes``
+      - Provides utilities for working with and managing data types in JAX
+        arrays and computations.
+      - 0.1.66
+      - 5.0.0
    * - ``jax.image``
      - Contains image manipulation functions like resize, scale and translation.
      - 0.1.57
@@ -311,10 +283,27 @@ ROCm and JAX versions.
        array.
      - 0.1.57
      - 5.0.0
+    * - ``jax.profiler``
+      - Contains JAX’s tracing and time profiling features.
+      - 0.1.57
+      - 5.0.0
    * - ``jax.stages``
      - Contains interfaces to stages of the compiled execution process.
      - 0.3.4
      - 5.0.0
+    * - ``jax.tree``
+      - Provides utilities for working with tree-like container data structures.
+      - 0.4.26
+      - 5.6.0
+    * - ``jax.tree_util``
+      - Provides utilities for working with nested data structures, or
+        ``pytrees``.
+      - 0.1.65
+      - 5.0.0
+    * - ``jax.typing``
+      - Provides JAX-specific static type annotations.
+      - 0.3.18
+      - 5.1.0
    * - ``jax.extend``
      - Provides modules for access to JAX internal machinery module. The
        ``jax.extend`` module defines a library view of some of JAX’s internal
@@ -350,8 +339,8 @@ A SciPy-like API for scientific computing.
    :header-rows: 1

    * - Module
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.scipy.cluster``
      - 0.3.11
      - 5.1.0
@@ -396,8 +385,8 @@ jax.scipy.stats module
   :header-rows: 1

   * - Module
-     - As of JAX
-     - As of ROCm
+     - Since JAX
+     - Since ROCm
   * - ``jax.scipy.stats.bernouli``
     - 0.1.56
     - 5.0.0
@@ -480,8 +469,8 @@ Modules for JAX extensions.
    :header-rows: 1

    * - Module
-      - As of JAX
-      - As of ROCm
+      - Since JAX
+      - Since ROCm
    * - ``jax.extend.ffi``
      - 0.4.30
      - 6.0.0
@@ -495,25 +484,190 @@ Modules for JAX extensions.
      - 0.4.15
      - 5.5.0

-Unsupported JAX features
-===============================================================================
+jax.experimental module
+-------------------------------------------------------------------------------

-The following GPU-accelerated JAX features are not supported by ROCm for
-the listed supported JAX versions.
+Experimental modules and APIs.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.checkify``
+      - 0.1.75
+      - 5.0.0
+    * - ``jax.experimental.compilation_cache.compilation_cache``
+      - 0.1.68
+      - 5.0.0
+    * - ``jax.experimental.custom_partitioning``
+      - 0.4.0
+      - 5.3.0
+    * - ``jax.experimental.jet``
+      - 0.1.56
+      - 5.0.0
+    * - ``jax.experimental.key_reuse``
+      - 0.4.26
+      - 5.6.0
+    * - ``jax.experimental.mesh_utils``
+      - 0.1.76
+      - 5.0.0
+    * - ``jax.experimental.multihost_utils``
+      - 0.3.2
+      - 5.0.0
+    * - ``jax.experimental.pallas``
+      - 0.4.15
+      - 5.5.0
+    * - ``jax.experimental.pjit``
+      - 0.1.61
+      - 5.0.0
+    * - ``jax.experimental.serialize_executable``
+      - 0.4.0
+      - 5.3.0
+    * - ``jax.experimental.shard_map``
+      - 0.4.3
+      - 5.3.0
+    * - ``jax.experimental.sparse``
+      - 0.1.75
+      - 5.0.0
+
+.. list-table::
+    :header-rows: 1
+
+    * - API
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.enable_x64``
+      - 0.1.60
+      - 5.0.0
+    * - ``jax.experimental.disable_x64``
+      - 0.1.60
+      - 5.0.0
+
+jax.experimental.pallas module
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Module for Pallas, a JAX extension for custom kernels.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.pallas.mosaic_gpu``
+      - 0.4.31
+      - 6.1.3
+    * - ``jax.experimental.pallas.tpu``
+      - 0.4.15
+      - 5.5.0
+    * - ``jax.experimental.pallas.triton``
+      - 0.4.32
+      - 6.1.3
+
+jax.experimental.sparse module
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Experimental support for sparse matrix operations.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Module
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.sparse.linalg``
+      - 0.3.15
+      - 5.2.0
+    * - ``jax.experimental.sparse.sparsify``
+      - 0.3.25
+      - ❌
+
+.. list-table::
+    :header-rows: 1
+
+    * - ``sparse`` data structure API
+      - Since JAX
+      - Since ROCm
+    * - ``jax.experimental.sparse.BCOO``
+      - 0.1.72
+      - 5.0.0
+    * - ``jax.experimental.sparse.BCSR``
+      - 0.3.20
+      - 5.1.0
+    * - ``jax.experimental.sparse.CSR``
+      - 0.1.75
+      - 5.0.0
+    * - ``jax.experimental.sparse.NM``
+      - 0.4.27
+      - 5.6.0
+    * - ``jax.experimental.sparse.COO``
+      - 0.1.75
+      - 5.0.0
+
+Unsupported JAX features
+------------------------
+
+The following are GPU-accelerated JAX features not currently supported by
+ROCm.

 .. list-table::
    :header-rows: 1

    * - Feature
      - Description
-
+      - Since JAX
    * - Mixed Precision with TF32
      - Mixed precision with TF32 is used for matrix multiplications,
        convolutions, and other linear algebra operations, particularly in
        deep learning workloads like CNNs and transformers.
-
+      - 0.2.25
+    * - RNN support
+      - Currently only LSTM with double bias is supported with float32 input
+        and weight.
+      - 0.3.25
    * - XLA int4 support
      - 4-bit integer (int4) precision in the XLA compiler.
+      - 0.4.0
+    * - ``jax.experimental.sparsify``
+      - Converts a dense matrix to a sparse matrix representation.
+      - Experimental

-    * - MOSAIC (GPU)
-      - Mosaic is a library of kernel-building abstractions for JAX's Pallas system
+Use cases and recommendations
+================================================================================
+
+* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
+  blog explores the implementation and training of a Generative Pre-trained
+  Transformer (GPT) model in JAX, inspired by Andrej Karpathy’s PyTorch-based
+  nanoGPT. By comparing how essential GPT components—such as self-attention
+  mechanisms and optimizers—are realized in PyTorch and JAX, also highlight
+  JAX’s unique features.
+
+* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
+  ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
+  blog post provides a comprehensive guide on enhancing the training efficiency
+  of GPT models by implementing mixed precision techniques in JAX, specifically
+  tailored for AMD GPUs utilizing the ROCm platform.
+
+* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
+  blog demonstrates how to develop a custom fused dropout-activation kernel for
+  matrices using Triton, integrate it with JAX, and benchmark its performance
+  using ROCm.
+
+* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
+  outlines the process of fine-tuning a Bidirectional Encoder Representations
+  from Transformers (BERT)-based large language model (LLM) using JAX for a text
+  classification task. The blog post discuss techniques for parallelizing the
+  fine-tuning across multiple AMD GPUs and assess the model's performance on a
+  holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
+  and the General Language Understanding Evaluation (GLUE) benchmark dataset was
+  used on a multi-GPU setup.
+
+* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
+  provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
+  accelerator using ROCm. The page is aimed at helping users achieve optimal
+  performance for deep learning and other high-performance computing tasks on
+  the MI300X GPU.
+
+For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
--- a/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/pytorch-compatibility.rst
@@ -95,7 +95,7 @@ Docker image compatibility

 AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
 with ROCm backends on Docker Hub. The following Docker image tags and associated
-inventories were tested on `ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_.
+inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
 Click |docker-icon| to view the image on Docker Hub.

 .. list-table:: PyTorch Docker image components
@@ -116,122 +116,137 @@ Click |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-c76af9bfb1c25b0f40d4c29e8652105c57250bf018d23ff595b06bd79666fdd7"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-ab1d350b818b90123cfda31363019d11c0d41a8f12a19e3cb2cb40cf0261137d"><i class="fab fa-docker fa-lg"></i></a>

      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
      - 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.16.0 <https://github.com/openucx/ucx/tree/v1.16.0>`_
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-f9d226135d51831c810dcb1251636ec61f85c65fcdda03e188c053a5d4f6585b"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-130536fdfceb374626a7bcb8d00b9d796ddfc3115677d51229e5b852d96b5ef4"><i class="fab fa-docker fa-lg"></i></a>

      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
      - 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
      - `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-3490e74d4f43dcdb3351dd334108d1ccd47e5a687c0523a2424ac1bcdd3dd6dd"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-20a2e24b4738dc1f1a44a04f23827918b56c99f7e697e6fccb90e9c4fae8ca9b"><i class="fab fa-docker fa-lg"></i></a>

      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
      - 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`_
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-26c5dfffb4a54625884abca83166940f17dd27bc75f1b24f6e80fbcb7d4e9afb"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-f09cb8ca39cc39222fb554060711f5c19130f7b4047aaf41fad4ba3ec470ca03"><i class="fab fa-docker fa-lg"></i></a>

      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
      - 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `3.11.9 <https://www.python.org/downloads/release/python-3119/>`_
      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
+      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
+      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-f378a24561fa6efc178b6dc93fc7d82e5b93653ecd59c89d4476674d29e1284d"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-a91c100d1fe608dae3eb7f60a751630363d4027ac3d077d428e92945204c338e"><i class="fab fa-docker fa-lg"></i></a>
+
+      - `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
+      - 22.04
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
+      - `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
+      - `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
+      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
+      - `master <https://bitbucket.org/icl/magma/src/master/>`_
+      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
+      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
+      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-66a89ce6485bb887af74bb9bd76bb613ab9834a6b1374649ea7ae379883454a4"><i class="fab fa-docker fa-lg"></i></a>

      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`_
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-2308dbd0e650b7bf8d548575cbb6e2bdc021f9386384ce570da16d58ee684d22"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-c716cf167e6e49893f11de03606ed37044153aca089e74ca615065c06877f86b"><i class="fab fa-docker fa-lg"></i></a>

      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
      - 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
      - `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
+      - `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
+      - `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-eefd2ab019728f91f94c5e6a9463cb0ea900b3011458d18fe5d88e50c0b57d86"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-0434cbc9b07b2c26e39480d7447f676f9057a1054dcff00e0050c25a6eddbd3c"><i class="fab fa-docker fa-lg"></i></a>

      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
      - 24.04
-      - `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`_
-      - `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-473643226ab0e93a04720b256ed772619878abf9c42b9f84828cefed522696fd"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-688b1c0073092615fb98778d78b16191e506097ee116a2d3d2628b264d5d367b"><i class="fab fa-docker fa-lg"></i></a>

      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
      - 22.04
-      - `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
      - `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
      - `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
      - `master <https://bitbucket.org/icl/magma/src/master/>`_
-      - `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
-      - `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
+      - `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
+      - `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
      - `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_

 Key ROCm libraries for PyTorch
--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -56,7 +56,7 @@ Docker image compatibility
 AMD validates and publishes ready-made `TensorFlow images
 <https://hub.docker.com/r/rocm/tensorflow>`_ with ROCm backends on
 Docker Hub. The following Docker image tags and associated inventories are
-validated for `ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_. Click
+validated for `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_. Click
 the |docker-icon| icon to view the image on Docker Hub.

 .. list-table:: TensorFlow Docker image components
@@ -73,122 +73,82 @@ the |docker-icon| icon to view the image on Docker Hub.

           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-dev/images/sha256-fa9cf5fa6c6079a7118727531ccd0056c6e3224a42c3d6e78a49e7781daafff4"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - dev
      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.18-runtime/images/sha256-d14d8c4989e7c9a60f4e72461b9e349de72347c6162dcd6897e6f4f80ffbb440"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-runtime/images/sha256-14addca4b92a47c806b83ebaeed593fc6672cd99f0017ed8dad759fe72ed0309"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - runtime
      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.18-dev/images/sha256-081e5bd6615a5dc17247ebd2ccc26895c3feeff086720400fa39b477e60a77c0"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-dev/images/sha256-f5e151060df04ff5fb59f5604b49cd371931bbe75b06aec9fe7781397c4be0ce"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - dev
      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.18-runtime/images/sha256-bf369637378264f4af6ddad5ca8b8611d3e372ffbea9ab7a06f1e122f0a0867b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.18-runtime/images/sha256-5cd4c03fdb1036570c0d4929da60a65c4466998dc80f1dc8a5a0b173eae017fb"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - runtime
      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.17-dev/images/sha256-5a502008c50d0b6508e6027f911bdff070a7493700ae064bed74e1d22b91ed50"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-dev/images/sha256-b3add80e374a2db2d1088d746e740afa89d439aca02cacba959ad298f5cd2b3f"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - dev
      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
+      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.17-runtime/images/sha256-1ee5dfffceb71ac66617ada33de3a10de0cb74199cc4b82441192e5e92fa2ddf"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.17-runtime/images/sha256-3a244f026c32177eff7958ffbad390de85b438b2b48b455cc39f15d70fa1270d"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

      - `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
      - runtime
      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-3124/>`_
+      - `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.17-dev/images/sha256-109218ad92bfae83bbd2710475f7502166e1ed54ca0b9748a9cbc3f5a1d75af1"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-dev/images/sha256-e0cecdfacb59169335049983cdab6da578c209bb9f4d08aad97e184ae59171a6"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - dev
      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.17-runtime/images/sha256-5d78bd5918d394f92263daa2990e88d695d27200dd90ed83ec64d20c7661c9c1"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
+           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.10-tf2.17-runtime/images/sha256-6f43de12f7eb202791b698ac51d28b72098de90034dbcd48486629b0125f7707"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>

-      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
+      - `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
      - runtime
      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
+      - `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
      - `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_

-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.16-dev/images/sha256-b09b1ad921c09c687b7c916141051e9fcf15539a5686e5aa67c689195a522719"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - dev
-      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.16-runtime/images/sha256-20dbd824e85558abfe33fc9283cc547d88cde3c623fe95322743a5082f883a64"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 24.04
-      - `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-dev/images/sha256-36c4fa047c86e2470ac473ec1429aea6d4b8934b90ffeb34d1afab40e7e5b377"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-dev/images/sha256-36c4fa047c86e2470ac473ec1429aea6d4b8934b90ffeb34d1afab40e7e5b377>`__
-      - dev
-      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
-
-    * - .. raw:: html
-
-           <a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-runtime/images/sha256-a94150ffb81365234ebfa34e764db5474bc6ab7d141b56495eac349778dafcf3"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
-
-      - `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
-      - runtime
-      - 22.04
-      - `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
-      - `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
-

 Critical ROCm libraries for TensorFlow
 ===============================================================================
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -34,86 +34,66 @@ project = "ROCm Documentation"
 project_path = os.path.abspath(".").replace("\\", "/")
 author = "Advanced Micro Devices, Inc."
 copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
-version = "7.0 Alpha"
-release = "7.0 Alpha"
+version = "6.4.0"
+release = "6.4.0"
 setting_all_article_info = True
 all_article_info_os = ["linux", "windows"]
 all_article_info_author = ""

 # pages with specific settings
 article_pages = [
-    {"file": "preview/index", "os": ["linux"],},
-    {"file": "preview/release", "os": ["linux"],},
-    {"file": "preview/install/index", "os": ["linux"],},
-    {"file": "preview/install/instinct-driver", "os": ["linux"],},
-    {"file": "preview/install/rocm", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/index", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/training", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/pre-training-megatron-lm-llama-3-8b", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/pre-training-torchtitan-llama-3-70b", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/fine-tuning-lora-llama-2-70b", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/inference", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/inference-vllm-llama-3.1-405b-fp4", "os": ["linux"],},
-    {"file": "preview/benchmark-docker/inference-sglang-deepseek-r1-fp4", "os": ["linux"],},
+    {"file": "about/release-notes", "os": ["linux"], "date": "2025-04-11"},
+    {"file": "release/changelog", "os": ["linux"],},
+    {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
+    {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
+    {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
+    {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
+    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},

-    # {"file": "about/release-notes", "os": ["linux"], "date": "2025-06-26"},
-    # {"file": "release/changelog", "os": ["linux"],},
-    # {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
-    # {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
-    # {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
-    # {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
-    # {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
-    #
-    # {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
-    #
-    # {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
-    #
-    # {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
-    #
-    # {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
-    #
-    # {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
-    # {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
-    #
-    # {"file": "how-to/system-optimization/index", "os": ["linux"]},
-    # {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
-    # {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
-    # {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
-    # {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
-    # {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
-    # {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
-    # {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
-    # {"file": "how-to/system-debugging", "os": ["linux"]},
-    # {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
+
+    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
+
+    {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
+
+    {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/install", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/vllm-benchmark", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/pytorch-inference-benchmark", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
+
+    {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
+    {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
+
+    {"file": "how-to/system-optimization/index", "os": ["linux"]},
+    {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
+    {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
+    {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
+    {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
+    {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
+    {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
+    {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
+    {"file": "how-to/system-debugging", "os": ["linux"]},
+    {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
 ]

 external_toc_path = "./sphinx/_toc.yml"
-# Options to improve documentation build time for preview release documentation
-external_toc_exclude_missing = True # don't build files that aren't in the TOC
-external_projects_remote_repository = "" # don't fetch data to resolve intersphinx xrefs

 # Add the _extensions directory to Python's search path
 sys.path.append(str(Path(__file__).parent / 'extension'))
@@ -139,7 +119,7 @@ html_static_path = ["sphinx/static/css", "extension/how-to/rocm-for-ai/inference
 html_css_files = ["rocm_custom.css", "rocm_rn.css", "vllm-benchmark.css"]
 html_js_files = ["vllm-benchmark.js"]

-html_title = "ROCm 7.0 Alpha documentation"
+html_title = "ROCm Documentation"

 html_theme_options = {"link_main_doc": False}

--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.7.3_20250325-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.7.3_20250325-benchmark-models.yaml
@@ -1,159 +0,0 @@
-vllm_benchmark:
-  unified_docker:
-    latest:
-      pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.7.3_20250325
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640
-      rocm_version: 6.3.1
-      vllm_version: 0.7.3
-      pytorch_version: 2.7.0 (dev nightly)
-      hipblaslt_version: 0.13
-  model_groups:
-    - group: Llama
-      tag: llama
-      models:
-      - model: Llama 3.1 8B
-        mad_tag: pyt_vllm_llama-3.1-8b
-        model_repo: meta-llama/Llama-3.1-8B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-8B
-        precision: float16
-      - model: Llama 3.1 70B
-        mad_tag: pyt_vllm_llama-3.1-70b
-        model_repo: meta-llama/Llama-3.1-70B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-        precision: float16
-      - model: Llama 3.1 405B
-        mad_tag: pyt_vllm_llama-3.1-405b
-        model_repo: meta-llama/Llama-3.1-405B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
-        precision: float16
-      - model: Llama 3.2 11B Vision
-        mad_tag: pyt_vllm_llama-3.2-11b-vision-instruct
-        model_repo: meta-llama/Llama-3.2-11B-Vision-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
-        precision: float16
-      - model: Llama 2 7B
-        mad_tag: pyt_vllm_llama-2-7b
-        model_repo: meta-llama/Llama-2-7b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-        precision: float16
-      - model: Llama 2 70B
-        mad_tag: pyt_vllm_llama-2-70b
-        model_repo: meta-llama/Llama-2-70b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-        precision: float16
-      - model: Llama 3.1 8B FP8
-        mad_tag: pyt_vllm_llama-3.1-8b_fp8
-        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 70B FP8
-        mad_tag: pyt_vllm_llama-3.1-70b_fp8
-        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 405B FP8
-        mad_tag: pyt_vllm_llama-3.1-405b_fp8
-        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
-        precision: float8
-    - group: Mistral
-      tag: mistral
-      models:
-      - model: Mixtral MoE 8x7B
-        mad_tag: pyt_vllm_mixtral-8x7b
-        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-        precision: float16
-      - model: Mixtral MoE 8x22B
-        mad_tag: pyt_vllm_mixtral-8x22b
-        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
-        precision: float16
-      - model: Mistral 7B
-        mad_tag: pyt_vllm_mistral-7b
-        model_repo: mistralai/Mistral-7B-Instruct-v0.3
-        url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
-        precision: float16
-      - model: Mixtral MoE 8x7B FP8
-        mad_tag: pyt_vllm_mixtral-8x7b_fp8
-        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mixtral MoE 8x22B FP8
-        mad_tag: pyt_vllm_mixtral-8x22b_fp8
-        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mistral 7B FP8
-        mad_tag: pyt_vllm_mistral-7b_fp8
-        model_repo: amd/Mistral-7B-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
-        precision: float8
-    - group: Qwen
-      tag: qwen
-      models:
-      - model: Qwen2 7B
-        mad_tag: pyt_vllm_qwen2-7b
-        model_repo: Qwen/Qwen2-7B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
-        precision: float16
-      - model: Qwen2 72B
-        mad_tag: pyt_vllm_qwen2-72b
-        model_repo: Qwen/Qwen2-72B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
-        precision: float16
-    - group: JAIS
-      tag: jais
-      models:
-      - model: JAIS 13B
-        mad_tag: pyt_vllm_jais-13b
-        model_repo: core42/jais-13b-chat
-        url: https://huggingface.co/core42/jais-13b-chat
-        precision: float16
-      - model: JAIS 30B
-        mad_tag: pyt_vllm_jais-30b
-        model_repo: core42/jais-30b-chat-v3
-        url: https://huggingface.co/core42/jais-30b-chat-v3
-        precision: float16
-    - group: DBRX
-      tag: dbrx
-      models:
-      - model: DBRX Instruct
-        mad_tag: pyt_vllm_dbrx-instruct
-        model_repo: databricks/dbrx-instruct
-        url: https://huggingface.co/databricks/dbrx-instruct
-        precision: float16
-      - model: DBRX Instruct FP8
-        mad_tag: pyt_vllm_dbrx_fp8
-        model_repo: amd/dbrx-instruct-FP8-KV
-        url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
-        precision: float8
-    - group: Gemma
-      tag: gemma
-      models:
-      - model: Gemma 2 27B
-        mad_tag: pyt_vllm_gemma-2-27b
-        model_repo: google/gemma-2-27b
-        url: https://huggingface.co/google/gemma-2-27b
-        precision: float16
-    - group: Cohere
-      tag: cohere
-      models:
-      - model: C4AI Command R+ 08-2024
-        mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
-        model_repo: CohereForAI/c4ai-command-r-plus-08-2024
-        url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
-        precision: float16
-      - model: C4AI Command R+ 08-2024 FP8
-        mad_tag: pyt_vllm_command-r-plus_fp8
-        model_repo: amd/c4ai-command-r-plus-FP8-KV
-        url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
-        precision: float8
-    - group: DeepSeek
-      tag: deepseek
-      models:
-      - model: DeepSeek MoE 16B
-        mad_tag: pyt_vllm_deepseek-moe-16b-chat
-        model_repo: deepseek-ai/deepseek-moe-16b-chat
-        url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.3_20250415-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.3_20250415-benchmark-models.yaml
@@ -1,152 +0,0 @@
-vllm_benchmark:
-  unified_docker:
-    latest:
-      pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250415
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845
-      rocm_version: 6.3.1
-      vllm_version: 0.8.3
-      pytorch_version: 2.7.0 (dev nightly)
-      hipblaslt_version: 0.13
-  model_groups:
-    - group: Llama
-      tag: llama
-      models:
-      - model: Llama 3.1 8B
-        mad_tag: pyt_vllm_llama-3.1-8b
-        model_repo: meta-llama/Llama-3.1-8B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-8B
-        precision: float16
-      - model: Llama 3.1 70B
-        mad_tag: pyt_vllm_llama-3.1-70b
-        model_repo: meta-llama/Llama-3.1-70B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-        precision: float16
-      - model: Llama 3.1 405B
-        mad_tag: pyt_vllm_llama-3.1-405b
-        model_repo: meta-llama/Llama-3.1-405B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
-        precision: float16
-      - model: Llama 3.2 11B Vision
-        mad_tag: pyt_vllm_llama-3.2-11b-vision-instruct
-        model_repo: meta-llama/Llama-3.2-11B-Vision-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
-        precision: float16
-      - model: Llama 2 7B
-        mad_tag: pyt_vllm_llama-2-7b
-        model_repo: meta-llama/Llama-2-7b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-        precision: float16
-      - model: Llama 2 70B
-        mad_tag: pyt_vllm_llama-2-70b
-        model_repo: meta-llama/Llama-2-70b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-        precision: float16
-      - model: Llama 3.1 8B FP8
-        mad_tag: pyt_vllm_llama-3.1-8b_fp8
-        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 70B FP8
-        mad_tag: pyt_vllm_llama-3.1-70b_fp8
-        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 405B FP8
-        mad_tag: pyt_vllm_llama-3.1-405b_fp8
-        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
-        precision: float8
-    - group: Mistral
-      tag: mistral
-      models:
-      - model: Mixtral MoE 8x7B
-        mad_tag: pyt_vllm_mixtral-8x7b
-        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-        precision: float16
-      - model: Mixtral MoE 8x22B
-        mad_tag: pyt_vllm_mixtral-8x22b
-        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
-        precision: float16
-      - model: Mistral 7B
-        mad_tag: pyt_vllm_mistral-7b
-        model_repo: mistralai/Mistral-7B-Instruct-v0.3
-        url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
-        precision: float16
-      - model: Mixtral MoE 8x7B FP8
-        mad_tag: pyt_vllm_mixtral-8x7b_fp8
-        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mixtral MoE 8x22B FP8
-        mad_tag: pyt_vllm_mixtral-8x22b_fp8
-        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mistral 7B FP8
-        mad_tag: pyt_vllm_mistral-7b_fp8
-        model_repo: amd/Mistral-7B-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
-        precision: float8
-    - group: Qwen
-      tag: qwen
-      models:
-      - model: Qwen2 7B
-        mad_tag: pyt_vllm_qwen2-7b
-        model_repo: Qwen/Qwen2-7B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
-        precision: float16
-      - model: Qwen2 72B
-        mad_tag: pyt_vllm_qwen2-72b
-        model_repo: Qwen/Qwen2-72B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
-        precision: float16
-      - model: QwQ-32B
-        mad_tag: pyt_vllm_qwq-32b
-        model_repo: Qwen/QwQ-32B
-        url: https://huggingface.co/Qwen/QwQ-32B
-        precision: float16
-        tunableop: true
-    - group: DBRX
-      tag: dbrx
-      models:
-      - model: DBRX Instruct
-        mad_tag: pyt_vllm_dbrx-instruct
-        model_repo: databricks/dbrx-instruct
-        url: https://huggingface.co/databricks/dbrx-instruct
-        precision: float16
-      - model: DBRX Instruct FP8
-        mad_tag: pyt_vllm_dbrx_fp8
-        model_repo: amd/dbrx-instruct-FP8-KV
-        url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
-        precision: float8
-    - group: Gemma
-      tag: gemma
-      models:
-      - model: Gemma 2 27B
-        mad_tag: pyt_vllm_gemma-2-27b
-        model_repo: google/gemma-2-27b
-        url: https://huggingface.co/google/gemma-2-27b
-        precision: float16
-    - group: Cohere
-      tag: cohere
-      models:
-      - model: C4AI Command R+ 08-2024
-        mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
-        model_repo: CohereForAI/c4ai-command-r-plus-08-2024
-        url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
-        precision: float16
-      - model: C4AI Command R+ 08-2024 FP8
-        mad_tag: pyt_vllm_command-r-plus_fp8
-        model_repo: amd/c4ai-command-r-plus-FP8-KV
-        url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
-        precision: float8
-    - group: DeepSeek
-      tag: deepseek
-      models:
-      - model: DeepSeek MoE 16B
-        mad_tag: pyt_vllm_deepseek-moe-16b-chat
-        model_repo: deepseek-ai/deepseek-moe-16b-chat
-        url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250513-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250513-benchmark-models.yaml
@@ -1,152 +0,0 @@
-vllm_benchmark:
-  unified_docker:
-    latest:
-      pull_tag: rocm/vllm:rocm6.3.1_vllm0.8.5_20250513
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4
-      rocm_version: 6.3.1
-      vllm_version: 0.8.5
-      pytorch_version: 2.7.0+gitf717b2a
-      hipblaslt_version: 0.15
-  model_groups:
-    - group: Meta Llama
-      tag: llama
-      models:
-      - model: Llama 3.1 8B
-        mad_tag: pyt_vllm_llama-3.1-8b
-        model_repo: meta-llama/Llama-3.1-8B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-8B
-        precision: float16
-      - model: Llama 3.1 70B
-        mad_tag: pyt_vllm_llama-3.1-70b
-        model_repo: meta-llama/Llama-3.1-70B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-        precision: float16
-      - model: Llama 3.1 405B
-        mad_tag: pyt_vllm_llama-3.1-405b
-        model_repo: meta-llama/Llama-3.1-405B-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
-        precision: float16
-      - model: Llama 3.2 11B Vision
-        mad_tag: pyt_vllm_llama-3.2-11b-vision-instruct
-        model_repo: meta-llama/Llama-3.2-11B-Vision-Instruct
-        url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
-        precision: float16
-      - model: Llama 2 7B
-        mad_tag: pyt_vllm_llama-2-7b
-        model_repo: meta-llama/Llama-2-7b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-        precision: float16
-      - model: Llama 2 70B
-        mad_tag: pyt_vllm_llama-2-70b
-        model_repo: meta-llama/Llama-2-70b-chat-hf
-        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-        precision: float16
-      - model: Llama 3.1 8B FP8
-        mad_tag: pyt_vllm_llama-3.1-8b_fp8
-        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 70B FP8
-        mad_tag: pyt_vllm_llama-3.1-70b_fp8
-        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
-        precision: float8
-      - model: Llama 3.1 405B FP8
-        mad_tag: pyt_vllm_llama-3.1-405b_fp8
-        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
-        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
-        precision: float8
-    - group: Mistral AI
-      tag: mistral
-      models:
-      - model: Mixtral MoE 8x7B
-        mad_tag: pyt_vllm_mixtral-8x7b
-        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-        precision: float16
-      - model: Mixtral MoE 8x22B
-        mad_tag: pyt_vllm_mixtral-8x22b
-        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
-        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
-        precision: float16
-      - model: Mistral 7B
-        mad_tag: pyt_vllm_mistral-7b
-        model_repo: mistralai/Mistral-7B-Instruct-v0.3
-        url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
-        precision: float16
-      - model: Mixtral MoE 8x7B FP8
-        mad_tag: pyt_vllm_mixtral-8x7b_fp8
-        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mixtral MoE 8x22B FP8
-        mad_tag: pyt_vllm_mixtral-8x22b_fp8
-        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-        precision: float8
-      - model: Mistral 7B FP8
-        mad_tag: pyt_vllm_mistral-7b_fp8
-        model_repo: amd/Mistral-7B-v0.1-FP8-KV
-        url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
-        precision: float8
-    - group: Qwen
-      tag: qwen
-      models:
-      - model: Qwen2 7B
-        mad_tag: pyt_vllm_qwen2-7b
-        model_repo: Qwen/Qwen2-7B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
-        precision: float16
-      - model: Qwen2 72B
-        mad_tag: pyt_vllm_qwen2-72b
-        model_repo: Qwen/Qwen2-72B-Instruct
-        url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
-        precision: float16
-      - model: QwQ-32B
-        mad_tag: pyt_vllm_qwq-32b
-        model_repo: Qwen/QwQ-32B
-        url: https://huggingface.co/Qwen/QwQ-32B
-        precision: float16
-        tunableop: true
-    - group: Databricks DBRX
-      tag: dbrx
-      models:
-      - model: DBRX Instruct
-        mad_tag: pyt_vllm_dbrx-instruct
-        model_repo: databricks/dbrx-instruct
-        url: https://huggingface.co/databricks/dbrx-instruct
-        precision: float16
-      - model: DBRX Instruct FP8
-        mad_tag: pyt_vllm_dbrx_fp8
-        model_repo: amd/dbrx-instruct-FP8-KV
-        url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
-        precision: float8
-    - group: Google Gemma
-      tag: gemma
-      models:
-      - model: Gemma 2 27B
-        mad_tag: pyt_vllm_gemma-2-27b
-        model_repo: google/gemma-2-27b
-        url: https://huggingface.co/google/gemma-2-27b
-        precision: float16
-    - group: Cohere
-      tag: cohere
-      models:
-      - model: C4AI Command R+ 08-2024
-        mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
-        model_repo: CohereForAI/c4ai-command-r-plus-08-2024
-        url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
-        precision: float16
-      - model: C4AI Command R+ 08-2024 FP8
-        mad_tag: pyt_vllm_command-r-plus_fp8
-        model_repo: amd/c4ai-command-r-plus-FP8-KV
-        url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
-        precision: float8
-    - group: DeepSeek
-      tag: deepseek
-      models:
-      - model: DeepSeek MoE 16B
-        mad_tag: pyt_vllm_deepseek-moe-16b-chat
-        model_repo: deepseek-ai/deepseek-moe-16b-chat
-        url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250521-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250521-benchmark-models.yaml
@@ -1,167 +0,0 @@
-vllm_benchmark:
-  unified_docker:
-    latest:
-      pull_tag: rocm/vllm:rocm6.3.1_vllm0.8.5_20250521
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11
-      rocm_version: 6.3.1
-      vllm_version: 0.8.5 (0.8.6.dev315+g91a560098.rocm631)
-      pytorch_version: 2.7.0+gitf717b2a
-      hipblaslt_version: 0.15
-  model_groups:
-    - group: Meta Llama
-      tag: llama
-      models:
-        - model: Llama 3.1 8B
-          mad_tag: pyt_vllm_llama-3.1-8b
-          model_repo: meta-llama/Llama-3.1-8B-Instruct
-          url: https://huggingface.co/meta-llama/Llama-3.1-8B
-          precision: float16
-        - model: Llama 3.1 70B
-          mad_tag: pyt_vllm_llama-3.1-70b
-          model_repo: meta-llama/Llama-3.1-70B-Instruct
-          url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-          precision: float16
-        - model: Llama 3.1 405B
-          mad_tag: pyt_vllm_llama-3.1-405b
-          model_repo: meta-llama/Llama-3.1-405B-Instruct
-          url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
-          precision: float16
-        - model: Llama 3.2 11B Vision
-          mad_tag: pyt_vllm_llama-3.2-11b-vision-instruct
-          model_repo: meta-llama/Llama-3.2-11B-Vision-Instruct
-          url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
-          precision: float16
-        - model: Llama 2 7B
-          mad_tag: pyt_vllm_llama-2-7b
-          model_repo: meta-llama/Llama-2-7b-chat-hf
-          url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-          precision: float16
-        - model: Llama 2 70B
-          mad_tag: pyt_vllm_llama-2-70b
-          model_repo: meta-llama/Llama-2-70b-chat-hf
-          url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-          precision: float16
-        - model: Llama 3.1 8B FP8
-          mad_tag: pyt_vllm_llama-3.1-8b_fp8
-          model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
-          url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
-          precision: float8
-        - model: Llama 3.1 70B FP8
-          mad_tag: pyt_vllm_llama-3.1-70b_fp8
-          model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
-          url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
-          precision: float8
-        - model: Llama 3.1 405B FP8
-          mad_tag: pyt_vllm_llama-3.1-405b_fp8
-          model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
-          url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
-          precision: float8
-    - group: Mistral AI
-      tag: mistral
-      models:
-        - model: Mixtral MoE 8x7B
-          mad_tag: pyt_vllm_mixtral-8x7b
-          model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
-          url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-          precision: float16
-        - model: Mixtral MoE 8x22B
-          mad_tag: pyt_vllm_mixtral-8x22b
-          model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
-          url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
-          precision: float16
-        - model: Mistral 7B
-          mad_tag: pyt_vllm_mistral-7b
-          model_repo: mistralai/Mistral-7B-Instruct-v0.3
-          url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
-          precision: float16
-        - model: Mixtral MoE 8x7B FP8
-          mad_tag: pyt_vllm_mixtral-8x7b_fp8
-          model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-          url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
-          precision: float8
-        - model: Mixtral MoE 8x22B FP8
-          mad_tag: pyt_vllm_mixtral-8x22b_fp8
-          model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-          url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
-          precision: float8
-        - model: Mistral 7B FP8
-          mad_tag: pyt_vllm_mistral-7b_fp8
-          model_repo: amd/Mistral-7B-v0.1-FP8-KV
-          url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
-          precision: float8
-    - group: Qwen
-      tag: qwen
-      models:
-        - model: Qwen2 7B
-          mad_tag: pyt_vllm_qwen2-7b
-          model_repo: Qwen/Qwen2-7B-Instruct
-          url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
-          precision: float16
-        - model: Qwen2 72B
-          mad_tag: pyt_vllm_qwen2-72b
-          model_repo: Qwen/Qwen2-72B-Instruct
-          url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
-          precision: float16
-        - model: QwQ-32B
-          mad_tag: pyt_vllm_qwq-32b
-          model_repo: Qwen/QwQ-32B
-          url: https://huggingface.co/Qwen/QwQ-32B
-          precision: float16
-          tunableop: true
-    - group: Databricks DBRX
-      tag: dbrx
-      models:
-        - model: DBRX Instruct
-          mad_tag: pyt_vllm_dbrx-instruct
-          model_repo: databricks/dbrx-instruct
-          url: https://huggingface.co/databricks/dbrx-instruct
-          precision: float16
-        - model: DBRX Instruct FP8
-          mad_tag: pyt_vllm_dbrx_fp8
-          model_repo: amd/dbrx-instruct-FP8-KV
-          url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
-          precision: float8
-    - group: Google Gemma
-      tag: gemma
-      models:
-        - model: Gemma 2 27B
-          mad_tag: pyt_vllm_gemma-2-27b
-          model_repo: google/gemma-2-27b
-          url: https://huggingface.co/google/gemma-2-27b
-          precision: float16
-    - group: Cohere
-      tag: cohere
-      models:
-        - model: C4AI Command R+ 08-2024
-          mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
-          model_repo: CohereForAI/c4ai-command-r-plus-08-2024
-          url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
-          precision: float16
-        - model: C4AI Command R+ 08-2024 FP8
-          mad_tag: pyt_vllm_command-r-plus_fp8
-          model_repo: amd/c4ai-command-r-plus-FP8-KV
-          url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
-          precision: float8
-    - group: DeepSeek
-      tag: deepseek
-      models:
-        - model: DeepSeek MoE 16B
-          mad_tag: pyt_vllm_deepseek-moe-16b-chat
-          model_repo: deepseek-ai/deepseek-moe-16b-chat
-          url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
-          precision: float16
-    - group: Microsoft Phi
-      tag: phi
-      models:
-        - model: Phi-4
-          mad_tag: pyt_vllm_phi-4
-          model_repo: microsoft/phi-4
-          url: https://huggingface.co/microsoft/phi-4
-    - group: TII Falcon
-      tag: falcon
-      models:
-        - model: Falcon 180B
-          mad_tag: pyt_vllm_falcon-180b
-          model_repo: tiiuae/falcon-180B
-          url: https://huggingface.co/tiiuae/falcon-180B
-          precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
@@ -23,11 +23,3 @@ pytorch_inference_benchmark:
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/chaidiscovery/chai-1
        precision: float16
-    - group: Mochi Video
-      tag: mochi
-      models:
-      - model: Mochi 1
-        mad_tag: pyt_mochi_video_inference
-        model_repo: genmo/mochi-1-preview
-        url: https://huggingface.co/genmo/mochi-1-preview
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,14 +1,14 @@
 vllm_benchmark:
  unified_docker:
    latest:
-      pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605
-      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c
-      rocm_version: 6.4.1
-      vllm_version: 0.9.0.1 (0.9.0.2.dev108+g71faa1880.rocm641)
-      pytorch_version: 2.7.0+gitf717b2a
-      hipblaslt_version: 0.15
+      pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250410
+      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250410/images/sha256-a0b55c6c0f3fa5d437fb54a66e32a108306c36d4776e570dfd0ae902719bd190
+      rocm_version: 6.3.1
+      vllm_version: 0.8.3
+      pytorch_version: 2.7.0 (dev nightly)
+      hipblaslt_version: 0.13
  model_groups:
-    - group: Meta Llama
+    - group: Llama
      tag: llama
      models:
      - model: Llama 3.1 8B
@@ -26,6 +26,11 @@ vllm_benchmark:
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
+      - model: Llama 3.2 11B Vision
+        mad_tag: pyt_vllm_llama-3.2-11b-vision-instruct
+        model_repo: meta-llama/Llama-3.2-11B-Vision-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
+        precision: float16
      - model: Llama 2 7B
        mad_tag: pyt_vllm_llama-2-7b
        model_repo: meta-llama/Llama-2-7b-chat-hf
@@ -51,7 +56,7 @@ vllm_benchmark:
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
-    - group: Mistral AI
+    - group: Mistral
      tag: mistral
      models:
      - model: Mixtral MoE 8x7B
@@ -103,7 +108,7 @@ vllm_benchmark:
        url: https://huggingface.co/Qwen/QwQ-32B
        precision: float16
        tunableop: true
-    - group: Databricks DBRX
+    - group: DBRX
      tag: dbrx
      models:
      - model: DBRX Instruct
@@ -116,7 +121,7 @@ vllm_benchmark:
        model_repo: amd/dbrx-instruct-FP8-KV
        url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
        precision: float8
-    - group: Google Gemma
+    - group: Gemma
      tag: gemma
      models:
      - model: Gemma 2 27B
@@ -145,18 +150,3 @@ vllm_benchmark:
        model_repo: deepseek-ai/deepseek-moe-16b-chat
        url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
        precision: float16
-    - group: Microsoft Phi
-      tag: phi
-      models:
-      - model: Phi-4
-        mad_tag: pyt_vllm_phi-4
-        model_repo: microsoft/phi-4
-        url: https://huggingface.co/microsoft/phi-4
-    - group: TII Falcon
-      tag: falcon
-      models:
-      - model: Falcon 180B
-        mad_tag: pyt_vllm_falcon-180b
-        model_repo: tiiuae/falcon-180B
-        url: https://huggingface.co/tiiuae/falcon-180B
-        precision: float16
--- a/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
@@ -1,29 +0,0 @@
-megatron-lm_benchmark:
-  model_groups:
-    - group: Meta Llama
-      tag: llama
-      models:
-      - model: Llama 3.3 70B
-        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
-      - model: Llama 3.1 8B
-        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
-      - model: Llama 3.1 70B
-        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
-      - model: Llama 2 7B
-        mad_tag: pyt_megatron_lm_train_llama-2-7b
-      - model: Llama 2 70B
-        mad_tag: pyt_megatron_lm_train_llama-2-70b
-    - group: DeepSeek
-      tag: deepseek
-      models:
-      - model: DeepSeek-V3
-        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
-      - model: DeepSeek-V2-Lite
-        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
-    - group: Mistral AI
-      tag: mistral
-      models:
-      - model: Mixtral 8x7B
-        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
-      - model: Mixtral 8x22B
-        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
--- a/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
@@ -1,120 +0,0 @@
-unified_docker:
-  latest:
-    pull_tag: rocm/pytorch-training:v25.6
-    docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
-    rocm_version: 6.4.1
-    pytorch_version: 2.8.0a0+git7d205b2
-    python_version: 3.10.17
-    transformer_engine_version: 1.14.0+2f85f5f2
-    flash_attention_version: 3.0.0.post1
-    hipblaslt_version: 0.15.0-8c6919d
-    triton_version: 3.3.0
-model_groups:
-  - group: Pre-training
-    tag: pre-training
-    models:
-    - model: Llama 3.1 8B
-      mad_tag: pyt_train_llama-3.1-8b
-      model_repo: Llama-3.1-8B
-      url: https://huggingface.co/meta-llama/Llama-3.1-8B
-      precision: BF16
-      training_modes: [pretrain]
-    - model: Llama 3.1 70B
-      mad_tag: pyt_train_llama-3.1-70b
-      model_repo: Llama-3.1-70B
-      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
-      precision: BF16
-      training_modes: [pretrain]
-    - model: FLUX.1-dev
-      mad_tag: pyt_train_flux
-      model_repo: Flux
-      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
-      precision: BF16
-      training_modes: [pretrain]
-  - group: Fine-tuning
-    tag: fine-tuning
-    models:
-    - model: Llama 4 Scout 17B-16E
-      mad_tag: pyt_train_llama-4-scout-17b-16e
-      model_repo: Llama-4-17B_16E
-      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 3.3 70B
-      mad_tag: pyt_train_llama-3.3-70b
-      model_repo: Llama-3.3-70B
-      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
-    - model: Llama 3.2 1B
-      mad_tag: pyt_train_llama-3.2-1b
-      model_repo: Llama-3.2-1B
-      url: https://huggingface.co/meta-llama/Llama-3.2-1B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 3.2 3B
-      mad_tag: pyt_train_llama-3.2-3b
-      model_repo: Llama-3.2-3B
-      url: https://huggingface.co/meta-llama/Llama-3.2-3B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 3.2 Vision 11B
-      mad_tag: pyt_train_llama-3.2-vision-11b
-      model_repo: Llama-3.2-Vision-11B
-      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
-      precision: BF16
-      training_modes: [finetune_fw]
-    - model: Llama 3.2 Vision 90B
-      mad_tag: pyt_train_llama-3.2-vision-90b
-      model_repo: Llama-3.2-Vision-90B
-      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
-      precision: BF16
-      training_modes: [finetune_fw]
-    - model: Llama 3.1 8B
-      mad_tag: pyt_train_llama-3.1-8b
-      model_repo: Llama-3.1-8B
-      url: https://huggingface.co/meta-llama/Llama-3.1-8B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 3.1 70B
-      mad_tag: pyt_train_llama-3.1-70b
-      model_repo: Llama-3.1-70B
-      url: https://huggingface.co/meta-llama/Llama-3.1-70B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
-    - model: Llama 3.1 405B
-      mad_tag: pyt_train_llama-3.1-405b
-      model_repo: Llama-3.1-405B
-      url: https://huggingface.co/meta-llama/Llama-3.1-405B
-      precision: BF16
-      training_modes: [finetune_qlora, HF_finetune_lora]
-    - model: Llama 3 8B
-      mad_tag: pyt_train_llama-3-8b
-      model_repo: Llama-3-8B
-      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 3 70B
-      mad_tag: pyt_train_llama-3-70b
-      model_repo: Llama-3-70B
-      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 2 7B
-      mad_tag: pyt_train_llama-2-7b
-      model_repo: Llama-2-7B
-      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
-    - model: Llama 2 13B
-      mad_tag: pyt_train_llama-2-13b
-      model_repo: Llama-2-13B
-      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
-      precision: BF16
-      training_modes: [finetune_fw, finetune_lora]
-    - model: Llama 2 70B
-      mad_tag: pyt_train_llama-2-70b
-      model_repo: Llama-2-70B
-      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
-      precision: BF16
-      training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]
--- a/docs/data/rocm-software-stack-6_4_0.jpg
+++ b/docs/data/rocm-software-stack-6_4_0.jpg
--- a/docs/how-to/rocm-for-ai/inference-optimization/model-quantization.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/model-quantization.rst
@@ -1,178 +1,15 @@
 .. meta::
   :description: How to use model quantization techniques to speed up inference.
-   :keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, Quark, GPTQ, transformers, bitsandbytes
+   :keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, GPTQ, transformers, bitsandbytes

 *****************************
 Model quantization techniques
 *****************************

 Quantization reduces the model size compared to its native full-precision version, making it easier to fit large models
-onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using AMD Quark, GPTQ
+onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using GPTQ
 and bitsandbytes on AMD Instinct hardware.

-.. _quantize-llms-quark:
-
-AMD Quark
-=========
-
-`AMD Quark <https://quark.docs.amd.com/latest/>`_ offers the leading efficient and scalable quantization solution tailored to AMD Instinct GPUs. It supports ``FP8`` and ``INT8`` quantization for activations, weights, and KV cache, 
-including ``FP8`` attention. For very large models, it employs a two-level ``INT4-FP8`` scheme—storing weights in ``INT4`` while computing with ``FP8``—for nearly 4× compression without sacrificing accuracy. 
-Quark scales efficiently across multiple GPUs, efficiently handling ultra-large models like Llama-3.1-405B. Quantized ``FP8`` models like Llama, Mixtral, and Grok-1 are available under the `AMD organization on Hugging Face <https://huggingface.co/collections/amd/quark-quantized-ocp-fp8-models-66db7936d18fcbaf95d4405c>`_, and can be deployed directly via `vLLM <https://github.com/vllm-project/vllm/tree/main/vllm>`_.
-
-Installing Quark
-------------------
-
-The latest release of Quark can be installed with pip
-
-.. code-block:: shell
-
-    pip install amd-quark
-
-For detailed installation instructions, refer to the `Quark documentation <https://quark.docs.amd.com/latest/install.html>`_.
-
-
-Using Quark for quantization
-----------------------------
-
-#. First, load the pre-trained model and its corresponding tokenizer using the Hugging Face ``transformers`` library.
-
-   .. code-block:: python
-
-      from transformers import AutoTokenizer, AutoModelForCausalLM
-
-      MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
-      MAX_SEQ_LEN = 512
-
-      model = AutoModelForCausalLM.from_pretrained(
-          MODEL_ID, device_map="auto", torch_dtype="auto",
-      )
-      model.eval()
-
-      tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, model_max_length=MAX_SEQ_LEN)
-      tokenizer.pad_token = tokenizer.eos_token
-
-#. Prepare the calibration DataLoader (static quantization requires calibration data).
-
-   .. code-block:: python
-
-      from datasets import load_dataset
-      from torch.utils.data import DataLoader
-
-      BATCH_SIZE = 1
-      NUM_CALIBRATION_DATA = 512
-
-      dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
-      text_data = dataset["text"][:NUM_CALIBRATION_DATA]
-
-      tokenized_outputs = tokenizer(
-      text_data, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SEQ_LEN
-      )
-      calib_dataloader = DataLoader(
-      tokenized_outputs['input_ids'], batch_size=BATCH_SIZE, drop_last=True
-      )
-
-#. Define the quantization configuration. See the comments in the following code snippet for descriptions of each configuration option.
-
-   .. code-block:: python
-
-      from quark.torch.quantization import (Config, QuantizationConfig,
-                                           FP8E4M3PerTensorSpec)
-
-      # Define fp8/per-tensor/static spec.
-      FP8_PER_TENSOR_SPEC = FP8E4M3PerTensorSpec(observer_method="min_max",
-          is_dynamic=False).to_quantization_spec()
-
-      # Define global quantization config, input tensors and weight apply FP8_PER_TENSOR_SPEC.
-      global_quant_config = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC,
-          weight=FP8_PER_TENSOR_SPEC)
-
-      # Define quantization config for kv-cache layers, output tensors apply FP8_PER_TENSOR_SPEC.
-      KV_CACHE_SPEC = FP8_PER_TENSOR_SPEC
-      kv_cache_layer_names_for_llama = ["*k_proj", "*v_proj"]
-      kv_cache_quant_config = {name :
-          QuantizationConfig(input_tensors=global_quant_config.input_tensors,
-                             weight=global_quant_config.weight,
-                             output_tensors=KV_CACHE_SPEC)
-          for name in kv_cache_layer_names_for_llama}
-      layer_quant_config = kv_cache_quant_config.copy()
-
-      EXCLUDE_LAYERS = ["lm_head"]
-      quant_config = Config(
-          global_quant_config=global_quant_config,
-          layer_quant_config=layer_quant_config,
-          kv_cache_quant_config=kv_cache_quant_config,
-          exclude=EXCLUDE_LAYERS)
-
-#. Quantize the model and export
-
-   .. code-block:: python
-
-      import torch
-      from quark.torch import ModelQuantizer, ModelExporter
-      from quark.torch.export import ExporterConfig, JsonExporterConfig
-
-      # Apply quantization.
-      quantizer = ModelQuantizer(quant_config)
-      quant_model = quantizer.quantize_model(model, calib_dataloader)
-
-      # Freeze quantized model to export.
-      freezed_model = quantizer.freeze(model)
-
-      # Define export config.
-      LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"]
-      export_config = ExporterConfig(json_export_config=JsonExporterConfig())
-      export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP
-
-      EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor"
-      exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR)
-      with torch.no_grad():
-          exporter.export_safetensors_model(freezed_model,
-              quant_config=quant_config, tokenizer=tokenizer)
-
-Evaluating the quantized model with vLLM
----------------------------------------
-
-The exported Quark-quantized model can be loaded directly by vLLM for inference. You need to specify the model path and inform vLLM about the quantization method (``quantization='quark'``) and the KV cache data type (``kv_cache_dtype='fp8'``).
-Use the ``LLM`` interface to load the model:
-
-.. code-block:: python
-
-   from vllm import LLM, SamplingParamsinterface
-
-   # Sample prompts.
-   prompts = [
-       "Hello, my name is",
-       "The president of the United States is",
-       "The capital of France is",
-       "The future of AI is",
-   ]
-   # Create a sampling params object.
-   sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-
-   # Create an LLM.
-   llm = LLM(model="Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor",
-             kv_cache_dtype='fp8',quantization='quark')
-   # Generate texts from the prompts. The output is a list of RequestOutput objects
-   # that contain the prompt, generated text, and other information.
-   outputs = llm.generate(prompts, sampling_params)
-   # Print the outputs.
-   print("\nGenerated Outputs:\n" + "-" * 60)
-   for output in outputs:
-       prompt = output.prompt
-       generated_text = output.outputs[0].text
-       print(f"Prompt:    {prompt!r}")
-       print(f"Output:    {generated_text!r}")
-       print("-" * 60)
-
-You can also evaluate the quantized model's accuracy on standard benchmarks using the `lm-evaluation-harness <https://github.com/EleutherAI/lm-evaluation-harness>`_. Pass the necessary vLLM arguments to ``lm_eval`` via ``--model_args``.
-
-.. code-block:: shell
-
-   lm_eval --model vllm \
-     --model_args pretrained=Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor,kv_cache_dtype='fp8',quantization='quark' \
-     --tasks gsm8k
-
-This provides a standardized way to measure the performance impact of quantization.
 .. _fine-tune-llms-gptq:

 GPTQ
@@ -196,7 +33,7 @@ The AutoGPTQ library implements the GPTQ algorithm.
   .. code-block:: shell

      # This will install pre-built wheel for a specific ROCm version.
-
+      
      pip install auto-gptq --no-build-isolation --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm573/

   Or, install AutoGPTQ from source for the appropriate ROCm version (for example, ROCm 6.1).
@@ -206,10 +43,10 @@ The AutoGPTQ library implements the GPTQ algorithm.
      # Clone the source code.
      git clone https://github.com/AutoGPTQ/AutoGPTQ.git
      cd AutoGPTQ
-
+      
      # Speed up the compilation by specifying PYTORCH_ROCM_ARCH to target device.
      PYTORCH_ROCM_ARCH=gfx942 ROCM_VERSION=6.1 pip install .
-
+      
      # Show the package after the installation 

 #. Run ``pip show auto-gptq`` to print information for the installed ``auto-gptq`` package. Its output should look like
@@ -275,7 +112,7 @@ Using GPTQ with Hugging Face Transformers
   .. code-block:: python

      from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
-
+      
      base_model_name = " NousResearch/Llama-2-7b-hf"
      tokenizer = AutoTokenizer.from_pretrained(base_model_name)
      gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
@@ -375,10 +212,10 @@ To get started with bitsandbytes primitives, use the following code as reference
 .. code-block:: python

   import bitsandbytes as bnb
-
+   
   # Use Int8 Matrix Multiplication
   bnb.matmul(..., threshold=6.0)
-
+   
   # Use bitsandbytes 8-bit Optimizers
   adam = bnb.optim.Adam8bit(model.parameters(), lr=0.001, betas=(0.9, 0.995))

@@ -390,14 +227,14 @@ To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndByt
 .. code-block:: python

   from transformers import AutoModelForCausalLM, BitsAndBytesConfig
-
+   
   base_model_name = "NousResearch/Llama-2-7b-hf"
   quantization_config = BitsAndBytesConfig(load_in_4bit=True)
   bnb_model_4bit = AutoModelForCausalLM.from_pretrained(
           base_model_name, 
           device_map="auto", 
           quantization_config=quantization_config)
-
+   
   # Check the memory footprint with get_memory_footprint method
   print(bnb_model_4bit.get_memory_footprint())

@@ -406,9 +243,9 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
 .. code-block:: python

   from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-
+   
   base_model_name = "NousResearch/Llama-2-7b-hf"
-
+   
   tokenizer = AutoTokenizer.from_pretrained(base_model_name)
   quantization_config = BitsAndBytesConfig(load_in_8bit=True)
   tokenizer = AutoTokenizer.from_pretrained(base_model_name)
@@ -416,7 +253,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
           base_model_name, 
           device_map="auto", 
           quantization_config=quantization_config)
-
+   
   prompt = "What is a large language model?"
   inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
   generated_ids = model.generate(**inputs)
--- a/docs/how-to/rocm-for-ai/inference-optimization/workload.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/workload.rst
@@ -678,7 +678,7 @@ To specify the quantization scaling config, use the
 ``--quantization-param-path`` parameter. If the parameter is not specified,
 the default scaling factor of ``1`` is used, which can lead to less accurate
 results. To generate ``kv-cache`` scaling JSON file, see `FP8 KV
-Cache <https://github.com/vllm-project/llm-compressor/blob/main/examples/quantization_kv_cache/README.md>`__
+Cache <https://github.com/vllm-project/vllm/blob/main/examples/fp8/README.md>`__
 in the vLLM GitHub repository.

 Two sample Llama scaling configuration files are in vLLM for ``llama2-70b`` and
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.4.3.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.4.3.rst
@@ -1,346 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified
-                 ROCm Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. caution::
-
-   This documentation does not reflect the latest version of ROCm vLLM
-   inference performance documentation. See :doc:`../vllm` for the latest version.
-
-.. _vllm-benchmark-unified-docker:
-
-The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
-a prebuilt, optimized environment designed for validating large language model
-(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This
-ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the
-MI300X accelerator and includes the following components:
-
-* `ROCm 6.2.0 <https://github.com/ROCm/ROCm>`_
-
-* `vLLM 0.4.3 <https://docs.vllm.ai/en/latest>`_
-
-* `PyTorch 2.4.0 <https://github.com/pytorch/pytorch>`_
-
-* Tuning files (in CSV format)
-
-With this Docker image, you can quickly validate the expected inference
-performance numbers on the MI300X accelerator. This topic also provides tips on
-optimizing performance with popular AI models.
-
-.. _vllm-benchmark-vllm:
-
-.. note::
-
-   vLLM is a toolkit and library for LLM inference and
-   serving. It deploys the PagedAttention algorithm, which reduces memory
-   consumption and increases throughput by leveraging dynamic key and value
-   allocation in GPU memory. vLLM also incorporates many LLM acceleration
-   and quantization algorithms. In addition, AMD implements high-performance
-   custom kernels and modules in vLLM to enhance performance further. See
-   :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for more
-   information.
-
-Getting started
-===============
-
-Use the following procedures to reproduce the benchmark results on an
-MI300X accelerator with the prebuilt vLLM Docker image.
-
-.. _vllm-benchmark-get-started:
-
-1. Disable NUMA auto-balancing.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
-
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
-
-Once setup is complete, you can choose between two options to reproduce the
-benchmark results:
-
-  :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
-
-  :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
-
-.. _vllm-benchmark-mad:
-
-MAD-integrated benchmarking
-===========================
-
-Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-directory and install the required packages on the host machine.
-
-.. code-block:: shell
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD
-   pip install -r requirements.txt
-
-Use this command to run a performance benchmark test of the Llama 3.1 8B model
-on one GPU with ``float16`` data type in the host machine.
-
-.. code-block:: shell
-
-   export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-   python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
-
-ROCm MAD launches a Docker container with the name
-``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
-model are collected in the following path: ``~/MAD/reports_float16/``
-
-Although the following eight models are pre-configured to collect latency and
-throughput performance data, users can also change the benchmarking parameters.
-Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
-
-Available models
----------------
-
-.. hlist::
-   :columns: 3
-
-   * ``pyt_vllm_llama-3.1-8b``
-
-   * ``pyt_vllm_llama-3.1-70b``
-
-   * ``pyt_vllm_llama-3.1-405b``
-
-   * ``pyt_vllm_llama-2-7b``
-
-   * ``pyt_vllm_mistral-7b``
-
-   * ``pyt_vllm_qwen2-7b``
-
-   * ``pyt_vllm_jais-13b``
-
-   * ``pyt_vllm_jais-30b``
-
-.. _vllm-benchmark-standalone:
-
-Standalone benchmarking
-=======================
-
-You can run the vLLM benchmark tool independently by starting the
-:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
-snippet.
-
-.. code-block::
-
-   docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
-   docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name unified_docker_vllm rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
-
-In the Docker container, clone the ROCm MAD repository and navigate to the
-benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-.. code-block::
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD/scripts/vllm
-
-Multiprocessing distributed executor
--------------------------------------
-
-To optimize vLLM performance, add the multiprocessing API server argument ``--distributed-executor-backend mp``.
-
-Command
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-To start the benchmark, use the following command with the appropriate options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
-
-See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
-
-.. note::
-
-   The input sequence length, output sequence length, and tensor parallel (TP) are
-   already configured. You don't need to specify them with this script.
-
-.. note::
-
-   If you encounter the following error, pass your access-authorized Hugging
-   Face token to the gated models.
-
-   .. code-block:: shell
-
-      OSError: You are trying to access a gated repo.
-
-      # pass your HF_TOKEN
-      export HF_TOKEN=$your_personal_hf_token
-
-.. _vllm-benchmark-standalone-options:
-
-Options
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. list-table::
-   :header-rows: 1
-
-   * - Name
-     - Options
-     - Description
-
-   * - ``$test_option``
-     - latency
-     - Measure decoding token latency
-
-   * -
-     - throughput
-     - Measure token generation throughput
-
-   * -
-     - all
-     - Measure both throughput and latency
-
-   * - ``$model_repo``
-     - ``meta-llama/Meta-Llama-3.1-8B-Instruct``
-     - Llama 3.1 8B
-
-   * - (``float16``)
-     - ``meta-llama/Meta-Llama-3.1-70B-Instruct``
-     - Llama 3.1 70B
-
-   * -
-     - ``meta-llama/Meta-Llama-3.1-405B-Instruct``
-     - Llama 3.1 405B
-
-   * -
-     - ``meta-llama/Llama-2-7b-chat-hf``
-     - Llama 2 7B
-
-   * -
-     - ``mistralai/Mixtral-8x7B-Instruct-v0.1``
-     - Mixtral 8x7B
-
-   * -
-     - ``mistralai/Mixtral-8x22B-Instruct-v0.1``
-     - Mixtral 8x22B
-
-   * -
-     - ``mistralai/Mistral-7B-Instruct-v0.3``
-     - Mixtral 7B
-
-   * -
-     - ``Qwen/Qwen2-7B-Instruct``
-     - Qwen2 7B
-
-   * -
-     - ``core42/jais-13b-chat``
-     - JAIS 13B
-
-   * -
-     - ``core42/jais-30b-chat-v3``
-     - JAIS 30B
-
-   * - ``$num_gpu``
-     - 1 or 8
-     - Number of GPUs
-
-   * - ``$datatype``
-     - ``float16``
-     - Data type
-
-.. _vllm-benchmark-run-benchmark:
-
-Running the benchmark on the MI300X accelerator
-----------------------------------------------
-
-Here are some examples of running the benchmark with various options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-Latency benchmark example
-^^^^^^^^^^^^^^^^^^^^^^^^^
- 
-Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` data type.
-
-.. code-block::
-
-   ./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
-
-Find the latency report at:
-
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv``
-
-Throughput benchmark example
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
-
-Find the throughput reports at:
-
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv``
-
-.. raw:: html
-
-   <style>
-   mjx-container[jax="CHTML"][display="true"] {
-       text-align: left;
-       margin: 0;
-   }
-
-   </style>
-
-.. note::
-
-   Throughput is calculated as:
-
-   - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-   - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-
-Further reading
-===============
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
-
- For a list of other ready-made Docker images for ROCm, see the
-  :doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.4.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.4.rst
@@ -1,419 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified
-                 ROCm Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. caution::
-
-   This documentation does not reflect the latest version of ROCm vLLM
-   inference performance documentation. See :doc:`../vllm` for the latest version.
-
-.. _vllm-benchmark-unified-docker:
-
-The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
-a prebuilt, optimized environment designed for validating large language model
-(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This
-ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the
-MI300X accelerator and includes the following components:
-
-* `ROCm 6.2.1 <https://github.com/ROCm/ROCm>`_
-
-* `vLLM 0.6.4 <https://docs.vllm.ai/en/latest>`_
-
-* `PyTorch 2.5.0 <https://github.com/pytorch/pytorch>`_
-
-* Tuning files (in CSV format)
-
-With this Docker image, you can quickly validate the expected inference
-performance numbers on the MI300X accelerator. This topic also provides tips on
-optimizing performance with popular AI models.
-
-.. hlist::
-   :columns: 6
-
-   * Llama 3.1 8B
-
-   * Llama 3.1 70B
-
-   * Llama 3.1 405B
-
-   * Llama 2 7B
-
-   * Llama 2 70B
-
-   * Mixtral 8x7B
-
-   * Mixtral 8x22B
-
-   * Mixtral 7B
-
-   * Qwen2 7B
-
-   * Qwen2 72B
-
-   * JAIS 13B
-
-   * JAIS 30B
-
-.. _vllm-benchmark-vllm:
-
-.. note::
-
-   vLLM is a toolkit and library for LLM inference and serving. AMD implements
-   high-performance custom kernels and modules in vLLM to enhance performance.
-   See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-   more information.
-
-Getting started
-===============
-
-Use the following procedures to reproduce the benchmark results on an
-MI300X accelerator with the prebuilt vLLM Docker image.
-
-.. _vllm-benchmark-get-started:
-
-1. Disable NUMA auto-balancing.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
-
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
-
-Once setup is complete, you can choose between two options to reproduce the
-benchmark results:
-
-  :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
-
-  :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
-
-.. _vllm-benchmark-mad:
-
-MAD-integrated benchmarking
-===========================
-
-Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-directory and install the required packages on the host machine.
-
-.. code-block:: shell
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD
-   pip install -r requirements.txt
-
-Use this command to run a performance benchmark test of the Llama 3.1 8B model
-on one GPU with ``float16`` data type in the host machine.
-
-.. code-block:: shell
-
-   export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-   python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
-
-ROCm MAD launches a Docker container with the name
-``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
-model are collected in the following path: ``~/MAD/reports_float16/``.
-
-Although the following models are preconfigured to collect latency and
-throughput performance data, you can also change the benchmarking parameters.
-Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
-
-Available models
----------------
-
-.. hlist::
-   :columns: 3
-
-   * ``pyt_vllm_llama-3.1-8b``
-
-   * ``pyt_vllm_llama-3.1-70b``
-
-   * ``pyt_vllm_llama-3.1-405b``
-
-   * ``pyt_vllm_llama-2-7b``
-
-   * ``pyt_vllm_llama-2-70b``
-
-   * ``pyt_vllm_mixtral-8x7b``
-
-   * ``pyt_vllm_mixtral-8x22b``
-
-   * ``pyt_vllm_mistral-7b``
-
-   * ``pyt_vllm_qwen2-7b``
-
-   * ``pyt_vllm_qwen2-72b``
-
-   * ``pyt_vllm_jais-13b``
-
-   * ``pyt_vllm_jais-30b``
-
-   * ``pyt_vllm_llama-3.1-8b_fp8``
-
-   * ``pyt_vllm_llama-3.1-70b_fp8``
-
-   * ``pyt_vllm_llama-3.1-405b_fp8``
-
-   * ``pyt_vllm_mixtral-8x7b_fp8``
-
-   * ``pyt_vllm_mixtral-8x22b_fp8``
-
-.. _vllm-benchmark-standalone:
-
-Standalone benchmarking
-=======================
-
-You can run the vLLM benchmark tool independently by starting the
-:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
-snippet.
-
-.. code-block::
-
-   docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
-   docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name vllm_v0.6.4 rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
-
-In the Docker container, clone the ROCm MAD repository and navigate to the
-benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-.. code-block::
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD/scripts/vllm
-
-Command
-------
-
-To start the benchmark, use the following command with the appropriate options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
-
-See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
-
-.. note::
-
-   The input sequence length, output sequence length, and tensor parallel (TP) are
-   already configured. You don't need to specify them with this script.
-
-.. note::
-
-   If you encounter the following error, pass your access-authorized Hugging
-   Face token to the gated models.
-
-   .. code-block:: shell
-
-      OSError: You are trying to access a gated repo.
-
-      # pass your HF_TOKEN
-      export HF_TOKEN=$your_personal_hf_token
-
-.. _vllm-benchmark-standalone-options:
-
-Options
-------
-
-.. list-table::
-   :header-rows: 1
-   :align: center
-
-   * - Name
-     - Options
-     - Description
-
-   * - ``$test_option``
-     - latency
-     - Measure decoding token latency
-
-   * -
-     - throughput
-     - Measure token generation throughput
-
-   * -
-     - all
-     - Measure both throughput and latency
-
-   * - ``$model_repo``
-     - ``meta-llama/Meta-Llama-3.1-8B-Instruct``
-     - Llama 3.1 8B
-
-   * - (``float16``)
-     - ``meta-llama/Meta-Llama-3.1-70B-Instruct``
-     - Llama 3.1 70B
-
-   * -
-     - ``meta-llama/Meta-Llama-3.1-405B-Instruct``
-     - Llama 3.1 405B
-
-   * -
-     - ``meta-llama/Llama-2-7b-chat-hf``
-     - Llama 2 7B
-
-   * -
-     - ``meta-llama/Llama-2-70b-chat-hf``
-     - Llama 2 70B
-
-   * -
-     - ``mistralai/Mixtral-8x7B-Instruct-v0.1``
-     - Mixtral 8x7B
-
-   * -
-     - ``mistralai/Mixtral-8x22B-Instruct-v0.1``
-     - Mixtral 8x22B
-
-   * -
-     - ``mistralai/Mistral-7B-Instruct-v0.3``
-     - Mixtral 7B
-
-   * -
-     - ``Qwen/Qwen2-7B-Instruct``
-     - Qwen2 7B
-
-   * -
-     - ``Qwen/Qwen2-72B-Instruct``
-     - Qwen2 72B
-
-   * -
-     - ``core42/jais-13b-chat``
-     - JAIS 13B
-
-   * -
-     - ``core42/jais-30b-chat-v3``
-     - JAIS 30B
-
-   * - ``$model_repo``
-     - ``amd/Meta-Llama-3.1-8B-Instruct-FP8-KV``
-     - Llama 3.1 8B
-
-   * - (``float8``)
-     - ``amd/Meta-Llama-3.1-70B-Instruct-FP8-KV``
-     - Llama 3.1 70B
-
-   * -
-     - ``amd/Meta-Llama-3.1-405B-Instruct-FP8-KV``
-     - Llama 3.1 405B
-
-   * -
-     - ``amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV``
-     - Mixtral 8x7B
-
-   * -
-     - ``amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV``
-     - Mixtral 8x22B
-
-   * - ``$num_gpu``
-     - 1 or 8
-     - Number of GPUs
-
-   * - ``$datatype``
-     - ``float16`` or ``float8``
-     - Data type
-
-.. _vllm-benchmark-run-benchmark:
-
-Running the benchmark on the MI300X accelerator
-----------------------------------------------
-
-Here are some examples of running the benchmark with various options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-Example 1: latency benchmark
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 
-Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
-
-.. code-block::
-
-   ./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
-   ./vllm_benchmark_report.sh -s latency -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8
-
-Find the latency reports at:
-
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv``
-
- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_latency_report.csv``
-
-Example 2: throughput benchmark
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
-   ./vllm_benchmark_report.sh -s throughput -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8
-
-Find the throughput reports at:
-
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv``
-
- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_throughput_report.csv``
-
-.. raw:: html
-
-   <style>
-   mjx-container[jax="CHTML"][display="true"] {
-       text-align: left;
-       margin: 0;
-   }
-   </style>
-
-.. note::
-
-   Throughput is calculated as:
-
-   - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-   - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-
-Further reading
-===============
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
-
- For a list of other ready-made Docker images for ROCm, see the
-  :doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
-
- To compare with the previous version of the ROCm vLLM Docker image for performance validation, refer to
-  `LLM inference performance validation on AMD Instinct MI300X (ROCm 6.2.0) <https://rocm.docs.amd.com/en/docs-6.2.0/how-to/performance-validation/mi300x/vllm-benchmark.html>`_.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.6.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.6.6.rst
@@ -1,461 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm vLLM Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-***********************************************************
-LLM inference performance validation on AMD Instinct MI300X
-***********************************************************
-
-.. caution::
-
-   This documentation does not reflect the latest version of ROCm vLLM
-   inference performance documentation. See :doc:`../vllm` for the latest version.
-
-.. _vllm-benchmark-unified-docker:
-
-The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
-a prebuilt, optimized environment for validating large language model (LLM)
-inference performance on the AMD Instinct™ MI300X accelerator. This ROCm vLLM
-Docker image integrates vLLM and PyTorch tailored specifically for the MI300X
-accelerator and includes the following components:
-
-* `ROCm 6.3.1 <https://github.com/ROCm/ROCm>`_
-
-* `vLLM 0.6.6 <https://docs.vllm.ai/en/latest>`_
-
-* `PyTorch 2.7.0 (2.7.0a0+git3a58512) <https://github.com/pytorch/pytorch>`_
-
-With this Docker image, you can quickly validate the expected inference
-performance numbers for the MI300X accelerator. This topic also provides tips on
-optimizing performance with popular AI models. For more information, see the lists of
-:ref:`available models for MAD-integrated benchmarking <vllm-benchmark-mad-models>`
-and :ref:`standalone benchmarking <vllm-benchmark-standalone-options>`.
-
-.. _vllm-benchmark-vllm:
-
-.. note::
-
-   vLLM is a toolkit and library for LLM inference and serving. AMD implements
-   high-performance custom kernels and modules in vLLM to enhance performance.
-   See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-   more information.
-
-Getting started
-===============
-
-Use the following procedures to reproduce the benchmark results on an
-MI300X accelerator with the prebuilt vLLM Docker image.
-
-.. _vllm-benchmark-get-started:
-
-1. Disable NUMA auto-balancing.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
-
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6
-
-Once the setup is complete, choose between two options to reproduce the
-benchmark results:
-
-  :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
-
-  :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
-
-.. _vllm-benchmark-mad:
-
-MAD-integrated benchmarking
-===========================
-
-Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-directory and install the required packages on the host machine.
-
-.. code-block:: shell
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD
-   pip install -r requirements.txt
-
-Use this command to run a performance benchmark test of the Llama 3.1 8B model
-on one GPU with ``float16`` data type in the host machine.
-
-.. code-block:: shell
-
-   export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-   python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
-
-ROCm MAD launches a Docker container with the name
-``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
-model are collected in the following path: ``~/MAD/reports_float16/``.
-
-Although the following models are preconfigured to collect latency and
-throughput performance data, you can also change the benchmarking parameters.
-Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
-
-.. _vllm-benchmark-mad-models:
-
-Available models
----------------
-
-.. list-table::
-   :header-rows: 1
-   :widths: 2, 3
-
-   * - Model name
-     - Tag
-
-   * - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B>`_
-     - ``pyt_vllm_llama-3.1-8b``
-
-   * - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
-     - ``pyt_vllm_llama-3.1-70b``
-
-   * - `Llama 3.1 405B <https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct>`_
-     - ``pyt_vllm_llama-3.1-405b``
-
-   * - `Llama 3.2 11B Vision <https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct>`_
-     - ``pyt_vllm_llama-3.2-11b-vision-instruct``
-
-   * - `Llama 2 7B <https://huggingface.co/meta-llama/Llama-2-7b-chat-hf>`_
-     - ``pyt_vllm_llama-2-7b``
-
-   * - `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70b-chat-hf>`_
-     - ``pyt_vllm_llama-2-70b``
-
-   * - `Mixtral MoE 8x7B <https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1>`_
-     - ``pyt_vllm_mixtral-8x7b``
-
-   * - `Mixtral MoE 8x22B <https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1>`_
-     - ``pyt_vllm_mixtral-8x22b``
-
-   * - `Mistral 7B <https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3>`_
-     - ``pyt_vllm_mistral-7b``
-
-   * - `Qwen2 7B <https://huggingface.co/Qwen/Qwen2-7B-Instruct>`_
-     - ``pyt_vllm_qwen2-7b``
-
-   * - `Qwen2 72B <https://huggingface.co/Qwen/Qwen2-72B-Instruct>`_
-     - ``pyt_vllm_qwen2-72b``
-
-   * - `JAIS 13B <https://huggingface.co/core42/jais-13b-chat>`_
-     - ``pyt_vllm_jais-13b``
-
-   * - `JAIS 30B <https://huggingface.co/core42/jais-30b-chat-v3>`_
-     - ``pyt_vllm_jais-30b``
-
-   * - `DBRX Instruct <https://huggingface.co/databricks/dbrx-instruct>`_
-     - ``pyt_vllm_dbrx-instruct``
-
-   * - `Gemma 2 27B <https://huggingface.co/google/gemma-2-27b>`_
-     - ``pyt_vllm_gemma-2-27b``
-
-   * - `C4AI Command R+ 08-2024 <https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024>`_
-     - ``pyt_vllm_c4ai-command-r-plus-08-2024``
-
-   * - `DeepSeek MoE 16B <https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat>`_
-     - ``pyt_vllm_deepseek-moe-16b-chat``
-
-   * - `Llama 3.1 70B FP8 <https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV>`_
-     - ``pyt_vllm_llama-3.1-70b_fp8``
-
-   * - `Llama 3.1 405B FP8 <https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV>`_
-     - ``pyt_vllm_llama-3.1-405b_fp8``
-
-   * - `Mixtral MoE 8x7B FP8 <https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV>`_
-     - ``pyt_vllm_mixtral-8x7b_fp8``
-
-   * - `Mixtral MoE 8x22B FP8 <https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV>`_
-     - ``pyt_vllm_mixtral-8x22b_fp8``
-
-   * - `Mistral 7B FP8 <https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV>`_
-     - ``pyt_vllm_mistral-7b_fp8``
-
-   * - `DBRX Instruct FP8 <https://huggingface.co/amd/dbrx-instruct-FP8-KV>`_
-     - ``pyt_vllm_dbrx_fp8``
-
-   * - `C4AI Command R+ 08-2024 FP8 <https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV>`_
-     - ``pyt_vllm_command-r-plus_fp8``
-
-.. _vllm-benchmark-standalone:
-
-Standalone benchmarking
-=======================
-
-You can run the vLLM benchmark tool independently by starting the
-:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
-snippet.
-
-.. code-block::
-
-   docker pull rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6
-   docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name vllm_v0.6.6 rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6
-
-In the Docker container, clone the ROCm MAD repository and navigate to the
-benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-.. code-block::
-
-   git clone https://github.com/ROCm/MAD
-   cd MAD/scripts/vllm
-
-Command
-------
-
-To start the benchmark, use the following command with the appropriate options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
-
-See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
-
-.. note::
-
-   The input sequence length, output sequence length, and tensor parallel (TP) are
-   already configured. You don't need to specify them with this script.
-
-.. note::
-
-   If you encounter the following error, pass your access-authorized Hugging
-   Face token to the gated models.
-
-   .. code-block:: shell
-
-      OSError: You are trying to access a gated repo.
-
-      # pass your HF_TOKEN
-      export HF_TOKEN=$your_personal_hf_token
-
-.. _vllm-benchmark-standalone-options:
-
-Options and available models
----------------------------
-
-.. list-table::
-   :header-rows: 1
-   :align: center
-
-   * - Name
-     - Options
-     - Description
-
-   * - ``$test_option``
-     - latency
-     - Measure decoding token latency
-
-   * -
-     - throughput
-     - Measure token generation throughput
-
-   * -
-     - all
-     - Measure both throughput and latency
-
-   * - ``$model_repo``
-     - ``meta-llama/Llama-3.1-8B-Instruct``
-     - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B>`_
-
-   * - (``float16``)
-     - ``meta-llama/Llama-3.1-70B-Instruct``
-     - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
-
-   * -
-     - ``meta-llama/Llama-3.1-405B-Instruct``
-     - `Llama 3.1 405B <https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct>`_
-
-   * -
-     - ``meta-llama/Llama-3.2-11B-Vision-Instruct``
-     - `Llama 3.2 11B Vision <https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct>`_
-
-   * -
-     - ``meta-llama/Llama-2-7b-chat-hf``
-     - `Llama 2 7B <https://huggingface.co/meta-llama/Llama-2-7b-chat-hf>`_
-
-   * -
-     - ``meta-llama/Llama-2-70b-chat-hf``
-     - `Llama 2 7B <https://huggingface.co/meta-llama/Llama-2-70b-chat-hf>`_
-
-   * -
-     - ``mistralai/Mixtral-8x7B-Instruct-v0.1``
-     - `Mixtral MoE 8x7B <https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1>`_
-
-   * -
-     - ``mistralai/Mixtral-8x22B-Instruct-v0.1``
-     - `Mixtral MoE 8x22B <https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1>`_
-
-   * -
-     - ``mistralai/Mistral-7B-Instruct-v0.3``
-     - `Mistral 7B <https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3>`_
-
-   * -
-     - ``Qwen/Qwen2-7B-Instruct``
-     - `Qwen2 7B <https://huggingface.co/Qwen/Qwen2-7B-Instruct>`_
-
-   * -
-     - ``Qwen/Qwen2-72B-Instruct``
-     - `Qwen2 72B <https://huggingface.co/Qwen/Qwen2-72B-Instruct>`_
-
-   * -
-     - ``core42/jais-13b-chat``
-     - `JAIS 13B <https://huggingface.co/core42/jais-13b-chat>`_
-
-   * -
-     - ``core42/jais-30b-chat-v3``
-     - `JAIS 30B <https://huggingface.co/core42/jais-30b-chat-v3>`_
-
-   * -
-     - ``databricks/dbrx-instruct``
-     - `DBRX Instruct <https://huggingface.co/databricks/dbrx-instruct>`_
-
-   * -
-     - ``google/gemma-2-27b``
-     - `Gemma 2 27B <https://huggingface.co/google/gemma-2-27b>`_
-
-   * -
-     - ``CohereForAI/c4ai-command-r-plus-08-2024``
-     - `C4AI Command R+ 08-2024 <https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024>`_
-
-   * -
-     - ``deepseek-ai/deepseek-moe-16b-chat``
-     - `DeepSeek MoE 16B <https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat>`_
-
-   * - ``$model_repo``
-     - ``amd/Llama-3.1-70B-Instruct-FP8-KV``
-     - `Llama 3.1 70B FP8 <https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV>`_
-
-   * - (``float8``)
-     - ``amd/Llama-3.1-405B-Instruct-FP8-KV``
-     - `Llama 3.1 405B FP8 <https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV>`_
-
-   * -
-     - ``amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV``
-     - `Mixtral MoE 8x7B FP8 <https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV>`_
-
-   * -
-     - ``amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV``
-     - `Mixtral MoE 8x22B FP8 <https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV>`_
-
-   * -
-     - ``amd/Mistral-7B-v0.1-FP8-KV``
-     - `Mistral 7B FP8 <https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV>`_
-
-   * -
-     - ``amd/dbrx-instruct-FP8-KV``
-     - `DBRX Instruct FP8 <https://huggingface.co/amd/dbrx-instruct-FP8-KV>`_
-
-   * -
-     - ``amd/c4ai-command-r-plus-FP8-KV``
-     - `C4AI Command R+ 08-2024 FP8 <https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV>`_
-
-   * - ``$num_gpu``
-     - 1 or 8
-     - Number of GPUs
-
-   * - ``$datatype``
-     - ``float16`` or ``float8``
-     - Data type
-
-.. _vllm-benchmark-run-benchmark:
-
-Running the benchmark on the MI300X accelerator
-----------------------------------------------
-
-Here are some examples of running the benchmark with various options.
-See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
-options and their descriptions.
-
-Example 1: latency benchmark
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 
-Use this command to benchmark the latency of the Llama 3.1 70B model on eight GPUs with the ``float16`` and ``float8`` data types.
-
-.. code-block::
-
-   ./vllm_benchmark_report.sh -s latency -m meta-llama/Llama-3.1-70B-Instruct -g 8 -d float16
-   ./vllm_benchmark_report.sh -s latency -m amd/Llama-3.1-70B-Instruct-FP8-KV -g 8 -d float8
-
-Find the latency reports at:
-
- ``./reports_float16/summary/Llama-3.1-70B-Instruct_latency_report.csv``
-
- ``./reports_float8/summary/Llama-3.1-70B-Instruct-FP8-KV_latency_report.csv``
-
-Example 2: throughput benchmark
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Use this command to benchmark the throughput of the Llama 3.1 70B model on eight GPUs with the ``float16`` and ``float8`` data types.
-
-.. code-block:: shell
-
-   ./vllm_benchmark_report.sh -s throughput -m meta-llama/Llama-3.1-70B-Instruct -g 8 -d float16
-   ./vllm_benchmark_report.sh -s throughput -m amd/Llama-3.1-70B-Instruct-FP8-KV -g 8 -d float8
-
-Find the throughput reports at:
-
- ``./reports_float16/summary/Llama-3.1-70B-Instruct_throughput_report.csv``
-
- ``./reports_float8/summary/Llama-3.1-70B-Instruct-FP8-KV_throughput_report.csv``
-
-.. raw:: html
-
-   <style>
-   mjx-container[jax="CHTML"][display="true"] {
-       text-align: left;
-       margin: 0;
-   }
-   </style>
-
-.. note::
-
-   Throughput is calculated as:
-
-   - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-   - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-
-Further reading
-===============
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`../inference-optimization/workload`.
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see :doc:`../../system-optimization/mi300x`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Running models from Hugging Face <hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../fine-tuning/index>`.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.3-20250415.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.3-20250415.rst
@@ -1,343 +0,0 @@
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm vLLM Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. _vllm-benchmark-unified-docker:
-
-.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.3_20250415-benchmark-models.yaml
-
-   {% set unified_docker = data.vllm_benchmark.unified_docker.latest %}
-   {% set model_groups = data.vllm_benchmark.model_groups %}
-
-   The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
-   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
-   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
-   accelerators and includes the following components:
-
-   * `ROCm {{ unified_docker.rocm_version }} <https://github.com/ROCm/ROCm>`_
-
-   * `vLLM {{ unified_docker.vllm_version }} <https://docs.vllm.ai/en/latest>`_
-
-   * `PyTorch {{ unified_docker.pytorch_version }} <https://github.com/pytorch/pytorch>`_
-
-   * `hipBLASLt {{ unified_docker.hipblaslt_version }} <https://github.com/ROCm/hipBLASLt>`_
-
-   With this Docker image, you can quickly test the :ref:`expected
-   inference performance numbers <vllm-benchmark-performance-measurements>` for
-   MI300X series accelerators.
-
-   .. _vllm-benchmark-available-models:
-
-   Supported models
-   ================
-
-   .. raw:: html
-
-      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
-        <div class="row">
-          <div class="col-2 me-2 model-param-head">Model</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-            <div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-   {% endfor %}
-          </div>
-        </div>
-
-        <div class="row mt-1">
-          <div class="col-2 me-2 model-param-head">Model variant</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-      {% set models = model_group.models %}
-      {% for model in models %}
-         {% if models|length % 3 == 0 %}
-            <div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% else %}
-            <div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% endif %}
-      {% endfor %}
-   {% endfor %}
-          </div>
-        </div>
-      </div>
-
-   .. _vllm-benchmark-vllm:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. note::
-
-         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
-         Some models require access authorization prior to use via an external license agreement through a third party.
-
-      {% endfor %}
-   {% endfor %}
-
-   .. note::
-
-      vLLM is a toolkit and library for LLM inference and serving. AMD implements
-      high-performance custom kernels and modules in vLLM to enhance performance.
-      See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-      more information.
-
-   .. _vllm-benchmark-performance-measurements:
-
-   Performance measurements
-   ========================
-
-   To evaluate performance, the
-   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-   page provides reference throughput and latency measurements for inferencing
-   popular AI models.
-
-   .. important::
-
-      The performance data presented in
-      `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-      only reflects the :doc:`latest version of this inference benchmarking environment <../vllm>`_.
-      The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
-
-   Advanced features and known issues
-   ==================================
-
-   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
-   see the developer's guide at `<https://github.com/ROCm/vllm/tree/7a9f58aae0e7215a5f3dccde60e35072c41656c2/docs/dev-docker>`__.
-
-   System validation
-   =================
-
-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
-
-   Pull the Docker image
-   =====================
-
-   Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull {{ unified_docker.pull_tag }}
-
-   Benchmarking
-   ============
-
-   Once the setup is complete, choose between two options to reproduce the
-   benchmark results:
-
-   .. _vllm-benchmark-mad:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. tab-set::
-
-         .. tab-item:: MAD-integrated benchmarking
-
-            Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-            directory and install the required packages on the host machine.
-
-            .. code-block:: shell
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD
-               pip install -r requirements.txt
-
-            Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-            using one GPU with the ``{{model.precision}}`` data type on the host machine.
-
-            .. code-block:: shell
-
-               export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-               python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
-
-            MAD launches a Docker container with the name
-            ``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
-            model are collected in the following path: ``~/MAD/reports_{{model.precision}}/``.
-
-            Although the :ref:`available models <vllm-benchmark-available-models>` are preconfigured
-            to collect latency and throughput performance data, you can also change the benchmarking
-            parameters. See the standalone benchmarking tab for more information.
-
-            {% if model.tunableop %}
-
-            .. note::
-
-               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
-               TunableOp automatically explores different implementations and configurations of certain PyTorch
-               operators to find the fastest one for your hardware.
-
-               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
-               (see
-               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
-               enable it, edit the default run behavior in the ``models.json``
-               configuration before running inference -- update the model's run
-               ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-
-            {% endif %}
-
-         .. tab-item:: Standalone benchmarking
-
-            Run the vLLM benchmark tool independently by starting the
-            `Docker container <{{ unified_docker.docker_hub_url }}>`_
-            as shown in the following snippet.
-
-            .. code-block::
-
-               docker pull {{ unified_docker.pull_tag }}
-               docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name test {{ unified_docker.pull_tag }}
-
-            In the Docker container, clone the ROCm MAD repository and navigate to the
-            benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-            .. code-block::
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD/scripts/vllm
-
-            To start the benchmark, use the following command with the appropriate options.
-
-            .. code-block::
-
-               ./vllm_benchmark_report.sh -s $test_option -m {{model.model_repo}} -g $num_gpu -d {{model.precision}}
-
-            .. list-table::
-               :header-rows: 1
-               :align: center
-
-               * - Name
-                 - Options
-                 - Description
-
-               * - ``$test_option``
-                 - latency
-                 - Measure decoding token latency
-
-               * -
-                 - throughput
-                 - Measure token generation throughput
-
-               * -
-                 - all
-                 - Measure both throughput and latency
-
-               * - ``$num_gpu``
-                 - 1 or 8
-                 - Number of GPUs
-
-               * - ``$datatype``
-                 - ``float16`` or ``float8``
-                 - Data type
-
-            .. note::
-
-               The input sequence length, output sequence length, and tensor parallel (TP) are
-               already configured. You don't need to specify them with this script.
-
-            .. note::
-
-               If you encounter the following error, pass your access-authorized Hugging
-               Face token to the gated models.
-
-               .. code-block::
-
-                  OSError: You are trying to access a gated repo.
-
-                  # pass your HF_TOKEN
-                  export HF_TOKEN=$your_personal_hf_token
-
-            Here are some examples of running the benchmark with various options.
-
-            * Latency benchmark
-
-              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block::
-
-                 ./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the latency report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_latency_report.csv``.
-
-            * Throughput benchmark
-
-              Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block:: shell
-
-                 ./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
-
-            .. raw:: html
-
-               <style>
-               mjx-container[jax="CHTML"][display="true"] {
-                  text-align: left;
-                  margin: 0;
-               }
-               </style>
-
-            .. note::
-
-               Throughput is calculated as:
-
-               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-      {% endfor %}
-   {% endfor %}
-
-Further reading
-===============
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`../inference-optimization/workload`.
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Running models from Hugging Face <hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../fine-tuning/index>`.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513.rst
@@ -1,354 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm vLLM Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. caution::
-
-   This documentation does not reflect the latest version of ROCm vLLM
-   inference performance documentation. See :doc:`../vllm` for the latest version.
-
-.. _vllm-benchmark-unified-docker:
-
-.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250513-benchmark-models.yaml
-
-   {% set unified_docker = data.vllm_benchmark.unified_docker.latest %}
-   {% set model_groups = data.vllm_benchmark.model_groups %}
-
-   The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
-   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
-   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
-   accelerators and includes the following components:
-
-   * `ROCm {{ unified_docker.rocm_version }} <https://github.com/ROCm/ROCm>`_
-
-   * `vLLM {{ unified_docker.vllm_version }} <https://docs.vllm.ai/en/latest>`_
-
-   * `PyTorch {{ unified_docker.pytorch_version }} <https://github.com/pytorch/pytorch>`_
-
-   * `hipBLASLt {{ unified_docker.hipblaslt_version }} <https://github.com/ROCm/hipBLASLt>`_
-
-   With this Docker image, you can quickly test the :ref:`expected
-   inference performance numbers <vllm-benchmark-performance-measurements>` for
-   MI300X series accelerators.
-
-   .. _vllm-benchmark-available-models:
-
-   Supported models
-   ================
-
-   The following models are supported for inference performance benchmarking
-   with vLLM and ROCm. Some instructions, commands, and recommendations in this
-   documentation might vary by model -- select one to get started.
-
-   .. raw:: html
-
-      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
-        <div class="row">
-          <div class="col-2 me-2 model-param-head">Model group</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-            <div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-   {% endfor %}
-          </div>
-        </div>
-
-        <div class="row mt-1">
-          <div class="col-2 me-2 model-param-head">Model</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-      {% set models = model_group.models %}
-      {% for model in models %}
-         {% if models|length % 3 == 0 %}
-            <div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% else %}
-            <div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% endif %}
-      {% endfor %}
-   {% endfor %}
-          </div>
-        </div>
-      </div>
-
-   .. _vllm-benchmark-vllm:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. note::
-
-         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
-         Some models require access authorization prior to use via an external license agreement through a third party.
-
-      {% endfor %}
-   {% endfor %}
-
-   .. note::
-
-      vLLM is a toolkit and library for LLM inference and serving. AMD implements
-      high-performance custom kernels and modules in vLLM to enhance performance.
-      See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-      more information.
-
-   .. _vllm-benchmark-performance-measurements:
-
-   Performance measurements
-   ========================
-
-   To evaluate performance, the
-   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-   page provides reference throughput and latency measurements for inferencing
-   popular AI models.
-
-   .. important::
-
-      The performance data presented in
-      `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-      only reflects the :doc:`latest version of this inference benchmarking environment <../vllm>`_.
-      The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
-
-   Advanced features and known issues
-   ==================================
-
-   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
-   see the developer's guide at `<https://github.com/ROCm/vllm/tree/16d2b92ebcf90fe55cf73fa0b9329a6c9d3dede8/docs/dev-docker>`__.
-
-   System validation
-   =================
-
-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
-
-   Pull the Docker image
-   =====================
-
-   Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull {{ unified_docker.pull_tag }}
-
-   Benchmarking
-   ============
-
-   Once the setup is complete, choose between two options to reproduce the
-   benchmark results:
-
-   .. _vllm-benchmark-mad:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. tab-set::
-
-         .. tab-item:: MAD-integrated benchmarking
-
-            Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-            directory and install the required packages on the host machine.
-
-            .. code-block:: shell
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD
-               pip install -r requirements.txt
-
-            Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-            using one GPU with the ``{{model.precision}}`` data type on the host machine.
-
-            .. code-block:: shell
-
-               export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-               python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
-
-            MAD launches a Docker container with the name
-            ``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
-            model are collected in the following path: ``~/MAD/reports_{{model.precision}}/``.
-
-            Although the :ref:`available models <vllm-benchmark-available-models>` are preconfigured
-            to collect latency and throughput performance data, you can also change the benchmarking
-            parameters. See the standalone benchmarking tab for more information.
-
-            {% if model.tunableop %}
-
-            .. note::
-
-               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
-               TunableOp automatically explores different implementations and configurations of certain PyTorch
-               operators to find the fastest one for your hardware.
-
-               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
-               (see
-               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
-               enable it, edit the default run behavior in the ``models.json``
-               configuration before running inference -- update the model's run
-               ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-
-            {% endif %}
-
-         .. tab-item:: Standalone benchmarking
-
-            Run the vLLM benchmark tool independently by starting the
-            `Docker container <{{ unified_docker.docker_hub_url }}>`_
-            as shown in the following snippet.
-
-            .. code-block::
-
-               docker pull {{ unified_docker.pull_tag }}
-               docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name test {{ unified_docker.pull_tag }}
-
-            In the Docker container, clone the ROCm MAD repository and navigate to the
-            benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-            .. code-block::
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD/scripts/vllm
-
-            To start the benchmark, use the following command with the appropriate options.
-
-            .. code-block::
-
-               ./vllm_benchmark_report.sh -s $test_option -m {{model.model_repo}} -g $num_gpu -d {{model.precision}}
-
-            .. list-table::
-               :header-rows: 1
-               :align: center
-
-               * - Name
-                 - Options
-                 - Description
-
-               * - ``$test_option``
-                 - latency
-                 - Measure decoding token latency
-
-               * -
-                 - throughput
-                 - Measure token generation throughput
-
-               * -
-                 - all
-                 - Measure both throughput and latency
-
-               * - ``$num_gpu``
-                 - 1 or 8
-                 - Number of GPUs
-
-               * - ``$datatype``
-                 - ``float16`` or ``float8``
-                 - Data type
-
-            .. note::
-
-               The input sequence length, output sequence length, and tensor parallel (TP) are
-               already configured. You don't need to specify them with this script.
-
-            .. note::
-
-               If you encounter the following error, pass your access-authorized Hugging
-               Face token to the gated models.
-
-               .. code-block::
-
-                  OSError: You are trying to access a gated repo.
-
-                  # pass your HF_TOKEN
-                  export HF_TOKEN=$your_personal_hf_token
-
-            Here are some examples of running the benchmark with various options.
-
-            * Latency benchmark
-
-              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block::
-
-                 ./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the latency report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_latency_report.csv``.
-
-            * Throughput benchmark
-
-              Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block:: shell
-
-                 ./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
-
-            .. raw:: html
-
-               <style>
-               mjx-container[jax="CHTML"][display="true"] {
-                  text-align: left;
-                  margin: 0;
-               }
-               </style>
-
-            .. note::
-
-               Throughput is calculated as:
-
-               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-      {% endfor %}
-   {% endfor %}
-
-Further reading
-===============
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`../../../inference-optimization/workload`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Running models from Hugging Face <../../hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../../../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../../../fine-tuning/index>`.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521.rst
@@ -1,354 +0,0 @@
-:orphan:
-
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm vLLM Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. caution::
-
-   This documentation does not reflect the latest version of ROCm vLLM
-   inference performance documentation. See :doc:`../vllm` for the latest version.
-
-.. _vllm-benchmark-unified-docker:
-
-.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.8.5_20250521-benchmark-models.yaml
-
-   {% set unified_docker = data.vllm_benchmark.unified_docker.latest %}
-   {% set model_groups = data.vllm_benchmark.model_groups %}
-
-   The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
-   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
-   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
-   accelerators and includes the following components:
-
-   * `ROCm {{ unified_docker.rocm_version }} <https://github.com/ROCm/ROCm>`_
-
-   * `vLLM {{ unified_docker.vllm_version }} <https://docs.vllm.ai/en/latest>`_
-
-   * `PyTorch {{ unified_docker.pytorch_version }} <https://github.com/ROCm/pytorch.git>`_
-
-   * `hipBLASLt {{ unified_docker.hipblaslt_version }} <https://github.com/ROCm/hipBLASLt>`_
-
-   With this Docker image, you can quickly test the :ref:`expected
-   inference performance numbers <vllm-benchmark-performance-measurements>` for
-   MI300X series accelerators.
-
-   .. _vllm-benchmark-available-models:
-
-   Supported models
-   ================
-
-   The following models are supported for inference performance benchmarking
-   with vLLM and ROCm. Some instructions, commands, and recommendations in this
-   documentation might vary by model -- select one to get started.
-
-   .. raw:: html
-
-      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
-        <div class="row">
-          <div class="col-2 me-2 model-param-head">Model group</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-            <div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-   {% endfor %}
-          </div>
-        </div>
-
-        <div class="row mt-1">
-          <div class="col-2 me-2 model-param-head">Model</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-      {% set models = model_group.models %}
-      {% for model in models %}
-         {% if models|length % 3 == 0 %}
-            <div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% else %}
-            <div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% endif %}
-      {% endfor %}
-   {% endfor %}
-          </div>
-        </div>
-      </div>
-
-   .. _vllm-benchmark-vllm:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. note::
-
-         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
-         Some models require access authorization prior to use via an external license agreement through a third party.
-
-      {% endfor %}
-   {% endfor %}
-
-   .. note::
-
-      vLLM is a toolkit and library for LLM inference and serving. AMD implements
-      high-performance custom kernels and modules in vLLM to enhance performance.
-      See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-      more information.
-
-   .. _vllm-benchmark-performance-measurements:
-
-   Performance measurements
-   ========================
-
-   To evaluate performance, the
-   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-   page provides reference throughput and latency measurements for inferencing
-   popular AI models.
-
-   .. note::
-
-      The performance data presented in
-      `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-      should not be interpreted as the peak performance achievable by AMD
-      Instinct MI325X and MI300X accelerators or ROCm software.
-
-   Advanced features and known issues
-   ==================================
-
-   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
-   see the developer's guide at `<https://github.com/ROCm/vllm/blob/main/docs/dev-docker/README.md>`__.
-
-   System validation
-   =================
-
-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
-
-   Pull the Docker image
-   =====================
-
-   Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull {{ unified_docker.pull_tag }}
-
-   Benchmarking
-   ============
-
-   Once the setup is complete, choose between two options to reproduce the
-   benchmark results:
-
-   .. _vllm-benchmark-mad:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. tab-set::
-
-         .. tab-item:: MAD-integrated benchmarking
-
-            Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-            directory and install the required packages on the host machine.
-
-            .. code-block:: shell
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD
-               pip install -r requirements.txt
-
-            Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-            using one GPU with the ``{{model.precision}}`` data type on the host machine.
-
-            .. code-block:: shell
-
-               export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-               python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
-
-            MAD launches a Docker container with the name
-            ``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
-            model are collected in the following path: ``~/MAD/reports_{{model.precision}}/``.
-
-            Although the :ref:`available models <vllm-benchmark-available-models>` are preconfigured
-            to collect latency and throughput performance data, you can also change the benchmarking
-            parameters. See the standalone benchmarking tab for more information.
-
-            {% if model.tunableop %}
-
-            .. note::
-
-               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
-               TunableOp automatically explores different implementations and configurations of certain PyTorch
-               operators to find the fastest one for your hardware.
-
-               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
-               (see
-               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
-               enable it, edit the default run behavior in the ``models.json``
-               configuration before running inference -- update the model's run
-               ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-
-            {% endif %}
-
-         .. tab-item:: Standalone benchmarking
-
-            Run the vLLM benchmark tool independently by starting the
-            `Docker container <{{ unified_docker.docker_hub_url }}>`_
-            as shown in the following snippet.
-
-            .. code-block::
-
-               docker pull {{ unified_docker.pull_tag }}
-               docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name test {{ unified_docker.pull_tag }}
-
-            In the Docker container, clone the ROCm MAD repository and navigate to the
-            benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-            .. code-block::
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD/scripts/vllm
-
-            To start the benchmark, use the following command with the appropriate options.
-
-            .. code-block::
-
-               ./vllm_benchmark_report.sh -s $test_option -m {{model.model_repo}} -g $num_gpu -d {{model.precision}}
-
-            .. list-table::
-               :header-rows: 1
-               :align: center
-
-               * - Name
-                 - Options
-                 - Description
-
-               * - ``$test_option``
-                 - latency
-                 - Measure decoding token latency
-
-               * -
-                 - throughput
-                 - Measure token generation throughput
-
-               * -
-                 - all
-                 - Measure both throughput and latency
-
-               * - ``$num_gpu``
-                 - 1 or 8
-                 - Number of GPUs
-
-               * - ``$datatype``
-                 - ``float16`` or ``float8``
-                 - Data type
-
-            .. note::
-
-               The input sequence length, output sequence length, and tensor parallel (TP) are
-               already configured. You don't need to specify them with this script.
-
-            .. note::
-
-               If you encounter the following error, pass your access-authorized Hugging
-               Face token to the gated models.
-
-               .. code-block::
-
-                  OSError: You are trying to access a gated repo.
-
-                  # pass your HF_TOKEN
-                  export HF_TOKEN=$your_personal_hf_token
-
-            Here are some examples of running the benchmark with various options.
-
-            * Latency benchmark
-
-              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block::
-
-                 ./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the latency report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_latency_report.csv``.
-
-            * Throughput benchmark
-
-              Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block:: shell
-
-                 ./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
-
-            .. raw:: html
-
-               <style>
-               mjx-container[jax="CHTML"][display="true"] {
-                  text-align: left;
-                  margin: 0;
-               }
-               </style>
-
-            .. note::
-
-               Throughput is calculated as:
-
-               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-      {% endfor %}
-   {% endfor %}
-
-Further reading
-===============
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`../../inference-optimization/workload`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Running models from Hugging Face <../hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../../fine-tuning/index>`.
-
-Previous versions
-=================
-
-See :doc:`vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
@@ -1,75 +0,0 @@
-:orphan:
-
-**************************************************
-vLLM inference performance testing version history
-**************************************************
-
-This table lists previous versions of the ROCm vLLM inference Docker image for
-inference performance testing. For detailed information about available models
-for benchmarking, see the version-specific documentation. You can find tagged
-previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.docker.com/r/rocm/vllm/tags>`_.
-
-.. list-table::
-   :header-rows: 1
-   :stub-columns: 1
-
-   * - ROCm version
-     - vLLM version
-     - PyTorch version
-     - Resources
-
-   * - 6.4.0
-     - 0.9.0.1
-     - 2.7.0
-     - 
-       * :doc:`Documentation <../vllm>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_
-
-   * - 6.3.1
-     - 0.8.5 (0.8.6.dev)
-     - 2.7.0
-     - 
-       * :doc:`Documentation <vllm-0.8.5-20250521>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_
-
-   * - 6.3.1
-     - 0.8.5
-     - 2.7.0
-     - 
-       * :doc:`Documentation <vllm-0.8.5-20250513>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4>`_
-
-   * - 6.3.1
-     - 0.8.3
-     - 2.7.0
-     - 
-       * :doc:`Documentation <vllm-0.8.3-20250415>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845>`_
-
-   * - 6.3.1
-     - 0.7.3
-     - 2.7.0
-     - 
-       * :doc:`Documentation <vllm-0.7.3-20250325>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
-
-   * - 6.3.1
-     - 0.6.6
-     - 2.7.0
-     - 
-       * :doc:`Documentation <vllm-0.6.6>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_
-
-   * - 6.2.1
-     - 0.6.4
-     - 2.5.0
-     - 
-       * :doc:`Documentation <vllm-0.6.4>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4/images/sha256-ccbb74cc9e7adecb8f7bdab9555f7ac6fc73adb580836c2a35ca96ff471890d8>`_
-
-   * - 6.2.0
-     - 0.4.3
-     - 2.4.0
-     - 
-       * :doc:`Documentation <vllm-0.4.3>`
-       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50/images/sha256-9e4dd4788a794c3d346d7d0ba452ae5e92d39b8dfac438b2af8efdc7f15d22c0>`_
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
@@ -1,346 +0,0 @@
-.. meta::
-   :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
-                 ROCm vLLM Docker image.
-   :keywords: model, MAD, automation, dashboarding, validate
-
-**********************************
-vLLM inference performance testing
-**********************************
-
-.. _vllm-benchmark-unified-docker:
-
-.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
-
-   {% set unified_docker = data.vllm_benchmark.unified_docker.latest %}
-   {% set model_groups = data.vllm_benchmark.model_groups %}
-
-   The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
-   a prebuilt, optimized environment for validating large language model (LLM)
-   inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
-   Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
-   accelerators and includes the following components:
-
-   * `ROCm {{ unified_docker.rocm_version }} <https://github.com/ROCm/ROCm>`_
-
-   * `vLLM {{ unified_docker.vllm_version }} <https://docs.vllm.ai/en/latest>`_
-
-   * `PyTorch {{ unified_docker.pytorch_version }} <https://github.com/ROCm/pytorch.git>`_
-
-   * `hipBLASLt {{ unified_docker.hipblaslt_version }} <https://github.com/ROCm/hipBLASLt>`_
-
-   With this Docker image, you can quickly test the :ref:`expected
-   inference performance numbers <vllm-benchmark-performance-measurements>` for
-   MI300X series accelerators.
-
-   .. _vllm-benchmark-available-models:
-
-   Supported models
-   ================
-
-   The following models are supported for inference performance benchmarking
-   with vLLM and ROCm. Some instructions, commands, and recommendations in this
-   documentation might vary by model -- select one to get started.
-
-   .. raw:: html
-
-      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
-        <div class="row">
-          <div class="col-2 me-2 model-param-head">Model group</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-            <div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-   {% endfor %}
-          </div>
-        </div>
-
-        <div class="row mt-1">
-          <div class="col-2 me-2 model-param-head">Model</div>
-          <div class="row col-10">
-   {% for model_group in model_groups %}
-      {% set models = model_group.models %}
-      {% for model in models %}
-         {% if models|length % 3 == 0 %}
-            <div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% else %}
-            <div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-         {% endif %}
-      {% endfor %}
-   {% endfor %}
-          </div>
-        </div>
-      </div>
-
-   .. _vllm-benchmark-vllm:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. note::
-
-         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
-         Some models require access authorization prior to use via an external license agreement through a third party.
-
-      {% endfor %}
-   {% endfor %}
-
-   .. note::
-
-      vLLM is a toolkit and library for LLM inference and serving. AMD implements
-      high-performance custom kernels and modules in vLLM to enhance performance.
-      See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
-      more information.
-
-   .. _vllm-benchmark-performance-measurements:
-
-   Performance measurements
-   ========================
-
-   To evaluate performance, the
-   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-   page provides reference throughput and latency measurements for inferencing popular AI models.
-
-   .. important::
-
-      The performance data presented in
-      `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
-      only reflects the latest version of this inference benchmarking environment.
-      The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
-
-   Advanced features and known issues
-   ==================================
-
-   For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
-   see the developer's guide at `<https://github.com/ROCm/vllm/tree/7bb0618b1fe725b7d4fad9e525aa44da12c94a8b/docs/dev-docker>`__.
-
-   System validation
-   =================
-
-   Before running AI workloads, it's important to validate that your AMD hardware is configured
-   correctly and performing optimally.
-
-   To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
-   might hang until the periodic balancing is finalized. For more information,
-   see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
-
-   .. code-block:: shell
-
-      # disable automatic NUMA balancing
-      sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
-      # check if NUMA balancing is disabled (returns 0 if disabled)
-      cat /proc/sys/kernel/numa_balancing
-      0
-
-   To test for optimal performance, consult the recommended :ref:`System health benchmarks
-   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-   system's configuration.
-
-   Pull the Docker image
-   =====================
-
-   Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
-   Use the following command to pull the Docker image from Docker Hub.
-
-   .. code-block:: shell
-
-      docker pull {{ unified_docker.pull_tag }}
-
-   Benchmarking
-   ============
-
-   Once the setup is complete, choose between two options to reproduce the
-   benchmark results:
-
-   .. _vllm-benchmark-mad:
-
-   {% for model_group in model_groups %}
-      {% for model in model_group.models %}
-
-   .. container:: model-doc {{model.mad_tag}}
-
-      .. tab-set::
-
-         .. tab-item:: MAD-integrated benchmarking
-
-            Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-            directory and install the required packages on the host machine.
-
-            .. code-block:: shell
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD
-               pip install -r requirements.txt
-
-            Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
-            using one GPU with the ``{{model.precision}}`` data type on the host machine.
-
-            .. code-block:: shell
-
-               export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-               python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
-
-            MAD launches a Docker container with the name
-            ``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
-            model are collected in the following path: ``~/MAD/reports_{{model.precision}}/``.
-
-            Although the :ref:`available models <vllm-benchmark-available-models>` are preconfigured
-            to collect latency and throughput performance data, you can also change the benchmarking
-            parameters. See the standalone benchmarking tab for more information.
-
-            {% if model.tunableop %}
-
-            .. note::
-
-               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
-               TunableOp automatically explores different implementations and configurations of certain PyTorch
-               operators to find the fastest one for your hardware.
-
-               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
-               (see
-               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
-               enable it, edit the default run behavior in the ``models.json``
-               configuration before running inference -- update the model's run
-               ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
-
-               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
-
-            {% endif %}
-
-         .. tab-item:: Standalone benchmarking
-
-            Run the vLLM benchmark tool independently by starting the
-            `Docker container <{{ unified_docker.docker_hub_url }}>`_
-            as shown in the following snippet.
-
-            .. code-block::
-
-               docker pull {{ unified_docker.pull_tag }}
-               docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name test {{ unified_docker.pull_tag }}
-
-            In the Docker container, clone the ROCm MAD repository and navigate to the
-            benchmark scripts directory at ``~/MAD/scripts/vllm``.
-
-            .. code-block::
-
-               git clone https://github.com/ROCm/MAD
-               cd MAD/scripts/vllm
-
-            To start the benchmark, use the following command with the appropriate options.
-
-            .. code-block::
-
-               ./vllm_benchmark_report.sh -s $test_option -m {{model.model_repo}} -g $num_gpu -d {{model.precision}}
-
-            .. list-table::
-               :header-rows: 1
-               :align: center
-
-               * - Name
-                 - Options
-                 - Description
-
-               * - ``$test_option``
-                 - latency
-                 - Measure decoding token latency
-
-               * -
-                 - throughput
-                 - Measure token generation throughput
-
-               * -
-                 - all
-                 - Measure both throughput and latency
-
-               * - ``$num_gpu``
-                 - 1 or 8
-                 - Number of GPUs
-
-               * - ``$datatype``
-                 - ``float16`` or ``float8``
-                 - Data type
-
-            .. note::
-
-               The input sequence length, output sequence length, and tensor parallel (TP) are
-               already configured. You don't need to specify them with this script.
-
-            .. note::
-
-               If you encounter the following error, pass your access-authorized Hugging
-               Face token to the gated models.
-
-               .. code-block::
-
-                  OSError: You are trying to access a gated repo.
-
-                  # pass your HF_TOKEN
-                  export HF_TOKEN=$your_personal_hf_token
-
-            Here are some examples of running the benchmark with various options.
-
-            * Latency benchmark
-
-              Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block::
-
-                 ./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the latency report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_latency_report.csv``.
-
-            * Throughput benchmark
-
-              Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
-
-              .. code-block:: shell
-
-                 ./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
-
-              Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
-
-            .. raw:: html
-
-               <style>
-               mjx-container[jax="CHTML"][display="true"] {
-                  text-align: left;
-                  margin: 0;
-               }
-               </style>
-
-            .. note::
-
-               Throughput is calculated as:
-
-               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
-
-               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
-      {% endfor %}
-   {% endfor %}
-
-Further reading
-===============
-
- To learn more about the options for latency and throughput benchmark scripts,
-  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
-
- To learn more about system settings and management practices to configure your system for
-  MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
-
- For application performance optimization strategies for HPC and AI workloads,
-  including inference with vLLM, see :doc:`../../inference-optimization/workload`.
-
- To learn how to run LLM models from Hugging Face or your own model, see
-  :doc:`Running models from Hugging Face <../hugging-face-models>`.
-
- To learn how to optimize inference on LLMs, see
-  :doc:`Inference optimization <../../inference-optimization/index>`.
-
- To learn how to fine-tune LLMs, see
-  :doc:`Fine-tuning LLMs <../../fine-tuning/index>`.
-
-Previous versions
-=================
-
-See :doc:`previous-versions/vllm-history` to find documentation for previous releases
-of the ``ROCm/vllm`` Docker image.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Istvan Kiss	8a13947e8f	Update docs/compatibility/ml-compatibility/pytorch-compatibility.rst Co-authored-by: Jeff Daily <jeff.daily@amd.com>	2025-04-25 20:45:27 +02:00
Istvan Kiss	b82258bf51	WIP	2025-04-25 14:43:24 +02:00
Istvan Kiss	2beb93c33c	Update PyTorch compatibility page	2025-04-25 14:43:24 +02:00