WIP

2026-01-10 23:28:03 -05:00 · 2024-07-01 13:36:11 +02:00 · 2024-07-01 13:20:21 +02:00 · 2024-07-01 13:14:17 +02:00 · 2024-07-01 13:09:34 +02:00 · 2024-07-01 13:09:23 +02:00
154 changed files with 2483 additions and 1022 deletions
--- a/.azuredevops/ci-builds/aomp.yml
+++ b/.azuredevops/ci-builds/aomp.yml
@@ -17,11 +17,7 @@ resources:
  pipelines:
  - pipeline: rocr-runtime_pipeline
    source: \ROCR-Runtime
-    trigger:
-      branches:
-        include:
-        - master
-
+    trigger: true
 # this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime

 trigger: none
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -84,8 +84,8 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
-        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
        -DCMAKE_BUILD_TYPE=Release
        -DAMDGPU_TARGETS=gfx1030;gfx1100
        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
--- a/.azuredevops/components/MIOpen.yml
+++ b/.azuredevops/components/MIOpen.yml
@@ -16,6 +16,7 @@ parameters:
    - libbz2-dev
    - nlohmann-json3-dev
    - libgtest-dev
+    - libdrm-dev
 - name: rocmDependencies
  type: object
  default:
@@ -30,6 +31,7 @@ parameters:
    - rocprofiler-register
    - clr
    - rocminfo
+    - roctracer

 jobs:
 - job: MIOpen
--- a/.azuredevops/components/composable_kernel.yml
+++ b/.azuredevops/components/composable_kernel.yml
@@ -12,6 +12,7 @@ parameters:
    - ninja-build
    - git
    - python3-pip
+    - libdrm-dev
 - name: rocmDependencies
  type: object
  default:
@@ -24,10 +25,11 @@ parameters:

 jobs:
 - job: composable_kernel
+  timeoutInMinutes: 210
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  pool: ${{ variables.MEDIUM_BUILD_POOL }}
+  pool: ${{ variables.ULTRA_BUILD_POOL }}
  workspace:
    clean: all
  steps:
@@ -57,6 +59,6 @@ jobs:
        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
        -DCMAKE_BUILD_TYPE=Release
-        -DGPU_TARGETS=gfx1030;gfx1100
+        -DINSTANCES_ONLY=ON
        -GNinja
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/hipSPARSE.yml
+++ b/.azuredevops/components/hipSPARSE.yml
@@ -65,3 +65,13 @@ jobs:
        -DBUILD_CLIENTS_SAMPLES=OFF
        -GNinja
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: hipSPARSE
+      publish: false
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
+    parameters:
+      sourceDir: $(Build.SourcesDirectory)/build/clients
+      contentsString: matrices/**
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: testMatrices
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -55,9 +55,9 @@ jobs:
      extraBuildFlags: >-
        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-        -DROCM_PATH="$(Agent.BuildDirectory)/rocm"
+        -DROCM_PATH=$(Agent.BuildDirectory)/rocm
        -DCMAKE_BUILD_TYPE=Release
        -DHIPTENSOR_BUILD_TESTS=ON
-        -DAMDGPU_TARGETS=gfx1030;gfx1100
-        -GNinja
+        -DAMDGPU_TARGETS=gfx90a
+      multithreadFlag: -- -j32
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/rccl.yml
+++ b/.azuredevops/components/rccl.yml
@@ -61,6 +61,7 @@ jobs:
      parameters:
        dependencyList: ${{ parameters.rocmDependencies }}
        dependencySource: tag-builds
+  - script: chmod +x $(Agent.BuildDirectory)/rocm/bin/hipify-perl
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
--- a/.azuredevops/components/rocAL.yml
+++ b/.azuredevops/components/rocAL.yml
@@ -0,0 +1,138 @@
+parameters:
+- name: checkoutRepo
+  type: string
+  default: 'self'
+- name: checkoutRef
+  type: string
+  default: ''
+- name: aptPackages
+  type: object
+  default:
+    - python3-pip
+    - python3-protobuf
+    - cmake
+    - ninja-build
+    - libprotobuf-dev
+    - libprotoc-dev
+    - protobuf-compiler
+    - liblmdb-dev
+    - pkg-config
+    - ffmpeg
+    - libavcodec-dev
+    - libavformat-dev
+    - libavutil-dev
+    - libswscale-dev
+    - libturbojpeg-dev
+    - libjpeg-turbo-official=3.0.2-20240124
+    - libopencv-dev
+- name: pipModules
+  type: object
+  default:
+    - numpy
+    - opencv-python
+    - torch
+    - pillow
+- name: rocmDependencies
+  type: object
+  default:
+    - rocm-cmake
+    - llvm-project
+    - ROCR-Runtime
+    - clr
+    - rocDecode
+    - half
+    - rpp
+    - MIVisionX
+    - aomp
+
+jobs:
+- job: rocAL
+  variables:
+  - group: common
+  - template: /.azuredevops/variables-global.yml
+  pool:
+    vmImage: ${{ variables.BASE_BUILD_POOL }}
+  workspace:
+    clean: all
+  steps:
+  - task: Bash@3
+    displayName: 'Register libjpeg-turbo packages'
+    inputs:
+      targetType: inline
+      script: |
+        sudo mkdir --parents --mode=0755 /etc/apt/keyrings
+        wget -q -O- https://packagecloud.io/dcommander/libjpeg-turbo/gpgkey | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/libjpeg-turbo.gpg > /dev/null
+        echo "deb [signed-by=/etc/apt/trusted.gpg.d/libjpeg-turbo.gpg] https://packagecloud.io/dcommander/libjpeg-turbo/any/ any main" | sudo tee /etc/apt/sources.list.d/libjpeg-turbo.list
+        sudo apt update
+        apt-cache show libjpeg-turbo-official | grep Version
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+      pipModules: ${{ parameters.pipModules }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
+    parameters:
+      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - task: Bash@3
+    displayName: 'Clone PyBind11'
+    inputs:
+      targetType: inline
+      script: git clone --depth 1 -b v2.11.1 https://github.com/pybind/pybind11
+      workingDirectory: '$(Build.SourcesDirectory)'
+  - task: Bash@3
+    displayName: 'Clone RapidJSON'
+    inputs:
+      targetType: inline
+      script: git clone --depth 1 https://github.com/Tencent/rapidjson.git
+      workingDirectory: '$(Build.SourcesDirectory)'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: PyBind11
+      cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
+      customInstallPath: false
+      installEnabled: false
+      extraBuildFlags: >-
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -GNinja
+  - task: Bash@3
+    displayName: 'Install PyBind11'
+    inputs:
+      targetType: inline
+      script: sudo cmake --build . --target install
+      workingDirectory: '$(Build.SourcesDirectory)/pybind11/build'
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: RapidJSON
+      cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
+      customInstallPath: false
+      installEnabled: false
+      extraBuildFlags: >-
+        -GNinja
+  - task: Bash@3
+    displayName: 'Install RapidJSON'
+    inputs:
+      targetType: inline
+      script: sudo cmake --build . --target install
+      workingDirectory: '$(Build.SourcesDirectory)/rapidjson/build'
+  # CI case: download latest default branch build
+  - ${{ if eq(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: staging
+  # manual build case: triggered by ROCm/ROCm repo
+  - ${{ if ne(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: tag-builds
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      extraBuildFlags: >-
+        -DROCM_PATH=$(Agent.BuildDirectory)/rocm
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;/opt/libjpeg-turbo
+        -DCMAKE_INSTALL_PREFIX_PYTHON=$Python3_STDARCH
+        -DCMAKE_BUILD_TYPE=Release
+        -GNinja
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/rocMLIR.yml
+++ b/.azuredevops/components/rocMLIR.yml
@@ -10,6 +10,13 @@ parameters:
  default:
    - cmake
    - ninja-build
+    - git
+    - python3-pip
+- name: rocmDependencies
+  type: object
+  default:
+    - llvm-project
+    - rocm-cmake

 jobs:
 - job: rocMLIR
@@ -17,8 +24,6 @@ jobs:
  - group: common
  - template: /.azuredevops/variables-global.yml
  pool: ${{ variables.MEDIUM_BUILD_POOL }}
-  container:
-    image: ${{ variables.DOCKER_IMAGE_NAME }}:${{ variables.LATEST_DOCKER_VERSION }}
  workspace:
    clean: all
  steps:
@@ -29,13 +34,25 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
    parameters:
      checkoutRepo: ${{ parameters.checkoutRepo }}
+# CI case: download latest default branch build
+  - ${{ if eq(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: staging
+# manual build case: triggered by ROCm/ROCm repo
+  - ${{ if ne(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: tag-builds
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
    parameters:
      extraBuildFlags: >-
        -DCMAKE_BUILD_TYPE=Release
-        -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/amdclang++
-        -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/amdclang
-        -DCMAKE_PREFIX_PATH=/opt/rocm
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
+        -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
        -DBUILD_FAT_LIBROCKCOMPILER=1
        -GNinja
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/rocSPARSE.yml
+++ b/.azuredevops/components/rocSPARSE.yml
@@ -75,3 +75,13 @@ jobs:
        -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/hip/cmake
        -GNinja
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: rocSPARSE
+      publish: false
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
+    parameters:
+      sourceDir: $(Build.SourcesDirectory)/build/clients
+      contentsString: matrices/**
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
+    parameters:
+      artifactName: testMatrices
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -5,6 +5,30 @@ parameters:
 - name: checkoutRef
  type: string
  default: ''
+- name: aptPackages
+  type: object
+  default:
+    - libglfw3-dev
+- name: rocmDependencies
+  type: object
+  default:
+    - AMDMIGraphX
+    - clr
+    - hipBLAS
+    - hipCUB
+    - HIPIFY
+    - hipRAND
+    - hipSOLVER
+    - hipSPARSE
+    - llvm-project
+    - rocBLAS
+    - rocPRIM
+    - rocprofiler-register
+    - ROCR-Runtime
+    - rocRAND
+    - rocSOLVER
+    - rocSPARSE
+    - rocThrust

 jobs:
 - job: rocm_examples
@@ -20,5 +44,28 @@ jobs:
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
    parameters:
      checkoutRepo: ${{ parameters.checkoutRepo }}
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+    parameters:
+      aptPackages: ${{ parameters.aptPackages }}
+  # CI case: download latest default branch build
+  - ${{ if eq(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: staging
+  # manual build case: triggered by ROCm/ROCm repo
+  - ${{ if ne(parameters.checkoutRef, '') }}:
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        dependencyList: ${{ parameters.rocmDependencies }}
+        dependencySource: tag-builds
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      # https://github.com/ROCm/HIP/issues/2203
+      extraBuildFlags: >-
+        -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DROCM_ROOT=$(Agent.BuildDirectory)/rocm
+        -DCMAKE_HIP_ARCHITECTURES=gfx1030;gfx1100
+        -DCMAKE_EXE_LINKER_FLAGS=-fgpu-rdc
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/rocprofiler-register.yml
+++ b/.azuredevops/components/rocprofiler-register.yml
@@ -21,4 +21,17 @@ jobs:
    parameters:
      checkoutRepo: ${{ parameters.checkoutRepo }}
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: rocprofiler-register
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
+    parameters:
+      componentName: rocprofiler-register-tests
+      extraBuildFlags: >-
+        -DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
+      cmakeBuildDir: 'tests/build'
+      installEnabled: false
+  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+    parameters:
+      componentName: rocprofiler-register
+      testDir: 'tests/build'
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/components/rocprofiler.yml
+++ b/.azuredevops/components/rocprofiler.yml
@@ -47,7 +47,7 @@ jobs:
  variables:
  - group: common
  - template: /.azuredevops/variables-global.yml
-  - name: HIP_ROCCLR_HOME 
+  - name: HIP_ROCCLR_HOME
    value: $(Agent.BuildDirectory)/rocm
  - name: ROCM_PATH
    value: $(Agent.BuildDirectory)/rocm
@@ -68,7 +68,7 @@ jobs:
    displayName: 'Download aqlprofile'
    inputs:
      targetType: inline
-      script: wget -nv https://repo.radeon.com/rocm/apt/6.1/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.60100.60100-82~22.04_amd64.deb
+      script: wget -nv https://repo.radeon.com/rocm/misc/aqlprofile/ubuntu-22.04/hsa-amd-aqlprofile_1.0.0.60200.60200-crdnnh.14213~22.04_amd64.deb
      workingDirectory: '$(Pipeline.Workspace)'
  - task: Bash@3
    displayName: 'Extract aqlprofile'
@@ -76,7 +76,7 @@ jobs:
      targetType: inline
      script: |
        mkdir hsa-amd-aqlprofile
-        dpkg-deb -R hsa-amd-aqlprofile_1.0.0.60100.60100-82~22.04_amd64.deb hsa-amd-aqlprofile
+        dpkg-deb -R hsa-amd-aqlprofile_1.0.0.60200.60200-crdnnh.14213~22.04_amd64.deb hsa-amd-aqlprofile
      workingDirectory: '$(Pipeline.Workspace)'
  - task: Bash@3
    displayName: 'Move aqlprofile'
@@ -84,7 +84,7 @@ jobs:
      targetType: inline
      script: |
        mkdir -p $(Agent.BuildDirectory)/rocm
-        cp -R hsa-amd-aqlprofile/opt/rocm-6.1.0/* $(Agent.BuildDirectory)/rocm
+        cp -R hsa-amd-aqlprofile/opt/rocm-6.2.0-14213/* $(Agent.BuildDirectory)/rocm
      workingDirectory: '$(Pipeline.Workspace)'
 # CI case: download latest default branch build
  - ${{ if eq(parameters.checkoutRef, '') }}:
--- a/.azuredevops/components/rocr_debug_agent.yml
+++ b/.azuredevops/components/rocr_debug_agent.yml
@@ -15,11 +15,13 @@ parameters:
 - name: rocmDependencies
  type: object
  default:
+    - rocm-cmake
    - clr
    - llvm-project
    - ROCdbgapi
    - rocminfo
    - ROCR-Runtime
+    - rocprofiler-register

 jobs:
 - job: rocr_debug_agent
@@ -56,5 +58,6 @@ jobs:
        -DCMAKE_BUILD_TYPE=Release
        -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip
        -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
+        -DROCM_PATH=$(Agent.BuildDirectory)/rocm
        -GNinja
  - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
--- a/.azuredevops/tag-builds/rocAL.yml
+++ b/.azuredevops/tag-builds/rocAL.yml
@@ -0,0 +1,29 @@
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml
+
+parameters:
+- name: checkoutRef
+  type: string
+  default: refs/tags/$(LATEST_RELEASE_TAG)
+
+resources:
+  repositories:
+  - repository: pipelines_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/ROCm
+  - repository: release_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/rocAL
+    ref: ${{ parameters.checkoutRef }}
+
+trigger: none
+pr: none
+
+jobs:
+  - template: ${{ variables.CI_COMPONENT_PATH }}/rocAL.yml
+    parameters:
+      checkoutRepo: release_repo
+      checkoutRef: ${{ parameters.checkoutRef }}
--- a/.azuredevops/tag-builds/rocm-examples.yml
+++ b/.azuredevops/tag-builds/rocm-examples.yml
@@ -0,0 +1,29 @@
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml
+
+parameters:
+- name: checkoutRef
+  type: string
+  default: refs/tags/$(LATEST_RELEASE_TAG)
+
+resources:
+  repositories:
+  - repository: pipelines_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/ROCm
+  - repository: release_repo
+    type: github
+    endpoint: ROCm
+    name: ROCm/rocm-examples
+    ref: ${{ parameters.checkoutRef }}
+
+trigger: none
+pr: none
+
+jobs:
+  - template: ${{ variables.CI_COMPONENT_PATH }}/rocm-examples.yml
+    parameters:
+      checkoutRepo: release_repo
+      checkoutRef: ${{ parameters.checkoutRef }}
--- a/.azuredevops/templates/steps/artifact-download.yml
+++ b/.azuredevops/templates/steps/artifact-download.yml
@@ -21,10 +21,14 @@ parameters:
    half: master
    HIP: develop
    hipBLAS: develop
+    hipCUB: develop
    hipRAND: develop
+    hipSOLVER: develop
    hipSPARSE: develop
    llvm-project: amd-staging
    MIOpen: develop
+    MIVisionX: develop
+    rdc: develop
    rocBLAS: develop
    ROCdbgapi : amd-master
    rocDecode: develop
@@ -40,6 +44,7 @@ parameters:
    rocSOLVER: develop
    rocSPARSE: develop
    ROCT-Thunk-Interface: master
+    rocThrust: develop
    roctracer: amd-master
    rpp: master
 - name: componentsFailureOkay
--- a/.azuredevops/templates/steps/build-cmake.yml
+++ b/.azuredevops/templates/steps/build-cmake.yml
@@ -5,6 +5,9 @@ parameters:
 - name: extraBuildFlags
  type: string
  default: ''
+- name: multithreadFlag
+  type: string
+  default: ''
 - name: cmakeBuildDir
  type: string
  default: 'build'
@@ -17,6 +20,12 @@ parameters:
 - name: installDir
  type: string
  default: '$(Build.BinariesDirectory)'
+- name: customInstallPath
+  type: boolean
+  default: true
+- name: installEnabled
+  type: boolean
+  default: true

 steps:
 # create workingDirectory if it does not exist and change into it
@@ -25,19 +34,23 @@ steps:
  displayName: '${{parameters.componentName }} CMake Flags'
  inputs:
    workingDirectory: ${{ parameters.cmakeBuildDir }}
-    cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
+    ${{ if eq(parameters.customInstallPath, true) }}:
+      cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
+    ${{ else }}:
+      cmakeArgs: ${{ parameters.extraBuildFlags }} ..
 # equivalent to running make $cmakeTargetDir from $cmakeBuildDir
 # i.e., cd $cmakeBuildDir; make $cmakeTargetDir
 - task: CMake@1
  displayName: '${{parameters.componentName }} Build'
  inputs:
    workingDirectory: ${{ parameters.cmakeBuildDir }}
-    cmakeArgs: '--build ${{ parameters.cmakeTargetDir }}'
+    cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} ${{ parameters.multithreadFlag }}'
    retryCountOnTaskFailure: 10
 # equivalent to running make $cmakeTarget from $cmakeBuildDir
 # e.g., make install
- task: CMake@1
-  displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
-  inputs:
-    workingDirectory: ${{ parameters.cmakeBuildDir }}
-    cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.cmakeTarget }}'
+- ${{ if eq(parameters.installEnabled, true) }}:
+  - task: CMake@1
+    displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
+    inputs:
+      workingDirectory: ${{ parameters.cmakeBuildDir }}
+      cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.cmakeTarget }}'
--- a/.azuredevops/templates/steps/dependencies-other.yml
+++ b/.azuredevops/templates/steps/dependencies-other.yml
@@ -12,23 +12,31 @@ steps:
  displayName: 'sudo apt-get update'
  inputs:
    targetType: inline
-    script: sudo apt-get update
+    script: sudo apt-get --yes update
+  env:
+    DEBIAN_FRONTEND: noninteractive
 - task: Bash@3
  displayName: 'sudo apt-get upgrade'
  inputs:
    targetType: inline
-    script: sudo apt-get update
+    script: sudo apt-get --yes upgrade
+  env:
+    DEBIAN_FRONTEND: noninteractive
 - task: Bash@3
  displayName: 'sudo apt-get fix'
  inputs:
    targetType: inline
    script: sudo apt --yes --fix-broken install
+  env:
+    DEBIAN_FRONTEND: noninteractive
 - ${{ if gt(length(parameters.aptPackages), 0) }}:
  - task: Bash@3
    displayName: 'sudo apt-get install ...'
    inputs:
      targetType: inline
      script: sudo apt-get --yes install ${{ join(' ', parameters.aptPackages) }}
+    env:
+      DEBIAN_FRONTEND: noninteractive
 - ${{ if gt(length(parameters.pipModules), 0) }}:
  - task: Bash@3
    displayName: 'pip install  ...'
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -31,11 +31,15 @@ parameters:
    composable_kernel: $(composable-kernel-pipeline-id)
    half: $(half-pipeline-id)
    hipBLAS: $(hipblas-pipeline-id)
+    hipCUB: $(hipcub-pipeline-id)
    HIPIFY: $(hipify-pipeline-id)
    hipRAND: $(hiprand-pipeline-id)
+    hipSOLVER: $(hipsolver-pipeline-id)
    hipSPARSE: $(hipsparse-pipeline-id)
    llvm-project: $(llvm-project-pipeline-id)
    MIOpen: $(miopen-pipeline-id)
+    MIVisionX: $(mivisionx-pipeline-id)
+    rdc: $(rdc-pipeline-id)
    rocBLAS: $(rocblas-pipeline-id)
    ROCdbgapi : $(rocdbgapi-pipeline-id)
    rocDecode: $(rocdecode-pipeline-id)
@@ -52,6 +56,7 @@ parameters:
    rocSOLVER: $(rocsolver-pipeline-id)
    rocSPARSE: $(rocsparse-pipeline-id)
    ROCT-Thunk-Interface: $(roct-thunk-interface-pipeline-id)
+    rocThrust: $(rocthrust-pipeline-id)
    roctracer: $(roctracer-pipeline-id)
    rpp: $(rpp-pipeline-id)
 - name: taggedPipelineIdentifiers
@@ -65,11 +70,15 @@ parameters:
    composable_kernel: $(composable-kernel-tagged-pipeline-id)
    half: $(half-tagged-pipeline-id)
    hipBLAS: $(hipblas-tagged-pipeline-id)
+    hipCUB: $(hipcub-tagged-pipeline-id)
    HIPIFY: $(hipify-tagged-pipeline-id)
    hipRAND: $(hiprand-tagged-pipeline-id)
+    hipSOLVER: $(hipsolver-tagged-pipeline-id)
    hipSPARSE: $(hipsparse-tagged-pipeline-id)
    llvm-project: $(llvm-project-tagged-pipeline-id)
    MIOpen: $(miopen-tagged-pipeline-id)
+    MIVisionX: $(mivisionx-tagged-pipeline-id)
+    rdc: $(rdc-tagged-pipeline-id)
    rocBLAS: $(rocblas-tagged-pipeline-id)
    ROCdbgapi : $(rocdbgapi-tagged-pipeline-id)
    rocDecode: $(rocdecode-tagged-pipeline-id)
@@ -86,6 +95,7 @@ parameters:
    rocSOLVER: $(rocsolver-tagged-pipeline-id)
    rocSPARSE: $(rocsparse-tagged-pipeline-id)
    ROCT-Thunk-Interface: $(roct-thunk-interface-tagged-pipeline-id)
+    rocThrust: $(rocthrust-tagged-pipeline-id)
    roctracer: $(roctracer-tagged-pipeline-id)
    rpp: $(rpp-tagged-pipeline-id)
 # set to true if you're calling this template file multiple files in same pipeline
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -15,6 +15,7 @@ AOMP
 APIC
 APIs
 APU
+AQL
 ASIC
 ASICs
 ASan
@@ -62,6 +63,7 @@ CommonMark
 Concretized
 Conda
 ConnectX
+DENORM
 DGEMM
 DKMS
 DL
@@ -70,6 +72,7 @@ DNN
 DNNL
 DPM
 DRI
+DRM
 DW
 DWORD
 Dask
@@ -85,6 +88,7 @@ ELMo
 ENDPGM
 EPYC
 ESXi
+FFFFFFF
 FFT
 FFTs
 FFmpeg
@@ -122,6 +126,7 @@ GenAI
 GenZ
 GitHub
 Gitpod
+HBCC
 HBM
 HCA
 HIPCC
@@ -132,6 +137,7 @@ HPCG
 HPE
 HPL
 HSA
+HW
 HWE
 Haswell
 Higgs
@@ -157,11 +163,16 @@ Intra
 Ioffe
 JSON
 Jupyter
+KBytes
+KERNARG
 KFD
 KiB
+KMD
 KVM
 Keras
+Kernarg
 Khronos
+Ki
 LAPACK
 LCLK
 LDS
@@ -180,6 +191,7 @@ MiB
 MIGraphX
 MIOpen
 MIOpenGEMM
+MIPMAP
 MIVisionX
 MLM
 MMA
@@ -222,6 +234,7 @@ NousResearch's
 NumPy
 OAM
 OAMs
+OBJFILE
 OCP
 OEM
 OFED
@@ -237,6 +250,7 @@ OpenCV
 OpenFabrics
 OpenGL
 OpenMP
+OpenMPI
 OpenSSL
 OpenVX
 PCI
@@ -261,6 +275,7 @@ RCCL
 RDC
 RDMA
 RDNA
+RGP
 RHEL
 ROC
 ROCProfiler
@@ -274,6 +289,7 @@ ROCmCC
 ROCmSoftwarePlatform
 ROCmValidationSuite
 ROCr
+RPATH
 RST
 RW
 Radeon
@@ -302,10 +318,12 @@ SMEM
 SMI
 SMT
 SPI
+SQTT
 SQs
 SRAM
 SRAMECC
 SVD
+SVM
 SWE
 SerDes
 Shlens
@@ -343,6 +361,8 @@ UIF
 USM
 UTCL
 UTIL
+UNBUNDLER
+USWC
 Uncached
 Unhandled
 VALU
@@ -357,6 +377,8 @@ VSIX
 VSkipped
 Vanhoucke
 Vulkan
+WERROR
+WG
 WGP
 WGPs
 WX
@@ -388,6 +410,8 @@ allocator
 allocators
 amdgpu
 api
+arg
+args
 atmi
 atomics
 autogenerated
@@ -400,6 +424,7 @@ bfloat
 bilinear
 bitsandbytes
 blit
+bool
 boson
 bosons
 buildable
@@ -411,6 +436,10 @@ centos
 centric
 changelog
 chiplet
+clBuildProgram
+clCompileProgram
+clLinkProgram
+clr
 cmake
 cmd
 coalescable
@@ -426,6 +455,7 @@ convolutional
 convolves
 cpp
 csn
+cstring
 cuBLAS
 cuFFT
 cuLIB
@@ -443,6 +473,7 @@ deallocation
 denoise
 denoised
 denoises
+denorm
 denormalize
 deserializers
 detections
@@ -457,6 +488,7 @@ embeddings
 enablement
 endpgm
 encodings
+enqueue
 env
 epilog
 etcetera
@@ -480,7 +512,9 @@ heterogenous
 hipBLAS
 hipBLASLt
 hipCUB
+hipConfig
 hipFFT
+hipHostMalloc
 hipLIB
 hipRAND
 hipSOLVER
@@ -489,12 +523,15 @@ hipSPARSELt
 hipTensor
 hipamd
 hipblas
+hipcc
 hipcub
 hipfft
 hipfort
 hipify
+hiprtc
 hipsolver
 hipsparse
+hpc
 hpp
 hsa
 hsakmt
@@ -509,6 +546,7 @@ initializer
 inlining
 installable
 interprocedural
+interprocess
 intra
 invariants
 invocating
@@ -526,9 +564,12 @@ localscratch
 logits
 lossy
 macOS
+malloc
 matchers
+mem
 microarchitecture
 migraphx
+mipmap
 miopen
 miopengemm
 mivisionx
@@ -539,6 +580,8 @@ mvffr
 namespace
 namespaces
 numref
+nvcc
+nvidia
 ocl
 opencl
 opencv
@@ -559,6 +602,7 @@ prebuilt
 precompiled
 prefetch
 prefetchable
+prepinned
 preprocess
 preprocessed
 preprocessing
@@ -589,6 +633,7 @@ rocFFT
 rocLIB
 rocMLIR
 rocPRIM
+rocProfiler
 rocRAND
 rocSOLVER
 rocSPARSE
@@ -609,11 +654,13 @@ rocsolver
 rocsparse
 rocthrust
 roctracer
+rpath
 runtime
 runtimes
 sL
 scalability
 scalable
+sdma
 sendmsg
 serializers
 shader
@@ -624,13 +671,17 @@ smi
 softmax
 spack
 src
+stderr
 stochastically
 strided
+stubing
+suballocaitons
 subdirectory
 subexpression
 subfolder
 subfolders
 supercomputing
+td
 tensorfloat
 th
 tokenization
@@ -647,6 +698,8 @@ tqdm
 tracebacks
 txt
 uarch
+uint
+unbundler
 uncached
 uncorrectable
 uninstallation
@@ -677,12 +730,14 @@ wavefronts
 whitespaces
 workgroup
 workgroups
+workitems
 writeback
 writebacks
 wrreq
 wzo
 xargs
+xf
 xz
 yaml
 ysvmadyb
-zypper
+zypper
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -164,7 +164,9 @@ ROCm™ 6.1.1 introduces minor fixes and improvements to some tools and librarie

 ### OS support

-ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
+* ROCm 6.1.1 now supports Oracle Linux. It has been tested against version 8.9 (kernel 5.15.0-205) with AMD Instinct MI300X accelerators.
+
+* ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).

 ### AMD SMI

@@ -1455,7 +1457,7 @@ Note: These complex operations are equivalent to corresponding types/functions o
      * `HIP_ROCclr`
    * NVIDIA platform
      * `HIP_PLATFORM_NVCC`
-* The [hcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hip/hcc_detail) and [nvcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hips/nvcc_detail) directories in the clr repository are removed.
+* The `hcc_detail` and `nvcc_detail` directories in the clr repository are removed.
 * Deprecated gcnArch is removed from hip device struct `hipDeviceProp_t`.
 * Deprecated `enum hipMemoryType memoryType;` is removed from HIP struct `hipPointerAttribute_t` union.

--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@@ -76,8 +76,8 @@ The Build time will reduce significantly if we limit the GPU Architecture/s agai

 mkdir -p ~/WORKSPACE/      # Or any folder name other than WORKSPACE
 cd ~/WORKSPACE/
-export ROCM_VERSION=6.1.0   # or 6.1.1
-~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.1.x -m rocm-build/rocm-${ROCM_VERSION}.xml
+export ROCM_VERSION=6.1.0   # or 6.1.1 6.1.2
+~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.1.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
 ~/bin/repo sync

 # --------------------------------------
@@ -86,9 +86,9 @@ export ROCM_VERSION=6.1.0   # or 6.1.1

 # Option 1: Start a docker container
 # Pulling required base docker images:
-# Ubuntu20.04 built from ROCm/rocm-build/docker/ubuntu20/Dockerfile
+# Ubuntu20.04 built from ROCm/tools/rocm-build/docker/ubuntu20/Dockerfile
 docker pull rocm/rocm-build-ubuntu-20.04:6.1
-# Ubuntu22.04 built from ROCm/rocm-build/docker/ubuntu22/Dockerfile
+# Ubuntu22.04 built from ROCm/tools/rocm-build/docker/ubuntu22/Dockerfile
 docker pull rocm/rocm-build-ubuntu-22.04:6.1

 # Start docker container and mount the source code folder:
@@ -107,10 +107,10 @@ docker run -ti \

 # Option 2: Install required packages into the host machine
 # For ubuntu20.04 system
-cd ROCm/rocm-build/docker/ubuntu20
+cd ROCm/tools/rocm-build/docker/ubuntu20
 bash install-prerequisites.sh
 # For ubuntu22.04 system
-cd ROCm/rocm-build/docker/ubuntu22
+cd ROCm/tools/rocm-build/docker/ubuntu22
 bash install-prerequisities.sh

 # --------------------------------------
@@ -126,13 +126,13 @@ export GPU_ARCHS="gfx940;gfx941;gfx942" # Example

 # Pick and run build commands in the docker container:
 # Build rocm-dev packages
-make -f ROCm/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
+make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
 # Build all ROCm packages
-make -f ROCm/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} all
+make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} all
 # list all ROCm components to find required components
-make -f ROCm/rocm-build/ROCm.mk list_components
+make -f ROCm/tools/rocm-build/ROCm.mk list_components
 # Build a single ROCm packages
-make -f ROCm/rocm-build/ROCm.mk T_rocblas
+make -f ROCm/tools/rocm-build/ROCm.mk T_rocblas

 # Find built packages in ubuntu20.04:
 out/ubuntu-20.04/20.04/deb/
@@ -151,7 +151,7 @@ out/ubuntu-22.04/22.04/logs/rocblas.inprogress  # Example
 out/ubuntu-22.04/22.04/logs/rocblas             # Example
 ```

-Note: [Overview for ROCm.mk](rocm-build/README.md)
+Note: [Overview for ROCm.mk](tools/rocm-build/README.md)

 ## ROCm documentation

--- a/docs/about/compatibility/openmp.md
+++ b/docs/about/compatibility/openmp.md
@@ -77,8 +77,7 @@ Obtain the value of `gpu-arch` by running the following command:

 [//]: # (dated link below, needs updating)

-See the complete list of compiler command-line references
-[here](https://github.com/ROCm/llvm-project/blob/amd-stg-open/clang/docs/CommandGuide/clang.rst).
+See the complete list of [compiler command-line references](https://github.com/ROCm/llvm-project/blob/amd-staging/openmp/docs/CommandLineArgumentReference.rst).

 ### Using `rocprof` with OpenMP

--- a/docs/about/license.md
+++ b/docs/about/license.md
@@ -17,7 +17,7 @@ following section.

 ## ROCm component licenses

-ROCm is released by Advanced Micro Devices, Inc. and is licensed per component separately.
+ROCm is released by Advanced Micro Devices, Inc. (AMD) and is licensed per component separately.
 The following table is a list of ROCm components with links to their respective license
 terms. These components may include third party components subject to
 additional licenses. Please review individual repositories for more information.
@@ -25,66 +25,71 @@ additional licenses. Please review individual repositories for more information.
 <!-- spellcheck-disable -->
 | Component | License |
 |:---------------------|:-------------------------|
-| [AMDMIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
-| [HIPCC](https://github.com/ROCm/HIPCC/blob/develop/LICENSE.txt) | [MIT](https://github.com/ROCm/HIPCC/blob/develop/LICENSE.txt) |
-| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
 | [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/develop/LICENSE.txt) |
-| [MIOpenGEMM](https://github.com/ROCm/MIOpenGEMM/) | [MIT](https://github.com/ROCm/MIOpenGEMM/blob/master/LICENSE.txt) |
-| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/master/LICENSE.txt) |
-| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/master/LICENSE.txt) |
-| [RCP](https://github.com/GPUOpen-Tools/radeon_compute_profiler/) | [MIT](https://github.com/GPUOpen-Tools/radeon_compute_profiler/blob/master/LICENSE) |
-| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
-| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
-| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
-| [ROCclr](https://github.com/ROCm/ROCclr/) | [MIT](https://github.com/ROCm/ROCclr/blob/develop/LICENSE.txt) |
-| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-master/LICENSE.txt) |
-| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
-| [ROCm-CompilerSupport](https://github.com/ROCm/ROCm-CompilerSupport/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCm-CompilerSupport/blob/amd-stg-open/LICENSE.txt) |
-| [ROCm-Device-Libs](https://github.com/ROCm/ROCm-Device-Libs/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCm-Device-Libs/blob/amd-stg-open/LICENSE.TXT) |
-| [ROCm-OpenCL-Runtime/api/opencl/khronos/icd](https://github.com/KhronosGroup/OpenCL-ICD-Loader/) | [Apache 2.0](https://github.com/KhronosGroup/OpenCL-ICD-Loader/blob/main/LICENSE) |
-| [ROCm-OpenCL-Runtime](https://github.com/ROCm/ROCm-OpenCL-Runtime/) | [MIT](https://github.com/ROCm/ROCm-OpenCL-Runtime/blob/develop/LICENSE.txt) |
-| [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
+| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
+| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
+| [AMDMIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
+| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
+| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
+| [AMD Common Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/develop/LICENCE) |
+| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
+| [hipamd](https://github.com/ROCm/clr/tree/develop/hipamd) | [MIT](https://github.com/ROCm/clr/blob/develop/hipamd/LICENSE.txt) |
+| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/develop/opencl) | [MIT](https://github.com/ROCm/clr/blob/develop/opencl/LICENSE.txt) |
 | [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
-| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
 | [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
-| [atmi](https://github.com/ROCm/atmi/) | [MIT](https://github.com/ROCm/atmi/blob/master/LICENSE.txt) |
+| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
+| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
+| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
+| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
+| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
 | [clang-ocl](https://github.com/ROCm/clang-ocl/) | [MIT](https://github.com/ROCm/clang-ocl/blob/master/LICENSE) |
-| [flang](https://github.com/ROCm/flang/) | [Apache 2.0](https://github.com/ROCm/flang/blob/master/LICENSE.txt) |
-| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/master/LICENSE.txt) |
+| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
+| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
+| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
+| [ROCR Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
+| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
+| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
 | [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
+| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
 | [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
 | [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
+| [hipFORT](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
+| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
 | [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
-| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
 | [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
+| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
 | [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
-| [hipamd](https://github.com/ROCm/hipamd/) | [MIT](https://github.com/ROCm/hipamd/blob/develop/LICENSE.txt) |
-| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/master/LICENSE) |
-| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/main/LICENSE.TXT) |
-| [rccl](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
-| [rdc](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/master/LICENSE) |
+| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
 | [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
 | [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
+| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
 | [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
 | [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
+| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
 | [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
 | [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
 | [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
 | [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
 | [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
-| [rocm-cmake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
-| [rocm_bandwidth_test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
-| [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_smi_lib/blob/master/License.txt) |
-| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/master/License.txt) |
-| [rocprofiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
-| [rocr_debug_agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/master/LICENSE.txt) |
-| [roctracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
-| rocm-llvm-alt | [AMD Proprietary License](https://www.amd.com/en/support/amd-software-eula)
+| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
+| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/develop/LICENSE) |
+| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
+| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
+| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
+| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
+| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/develop/LICENSE) |
+| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
+| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
+| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
+| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
+| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
+| [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
+| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html)

 Open sourced ROCm components are released via public GitHub
-repositories, packages on https://repo.radeon.com and other distribution channels.
-Proprietary products are only available on https://repo.radeon.com. Currently, only
-one component of ROCm, rocm-llvm-alt is governed by a proprietary license.
+repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
+Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com). Currently, only
+one component of ROCm, `rocm-llvm-alt` is governed by a proprietary license.
 Proprietary components are organized in a proprietary subdirectory in the package
 repositories to distinguish from open sourced packages.

@@ -92,7 +97,7 @@ repositories to distinguish from open sourced packages.
 The following additional terms and conditions apply to your use of ROCm technical documentation.
 ```

-©2023 Advanced Micro Devices, Inc. All rights reserved.
+©2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.

 The information presented in this document is for informational purposes only
 and may contain technical inaccuracies, omissions, and typographical errors. The
@@ -125,8 +130,8 @@ companies.

 :::{attention}
 AQL Profiler and AOCC CPU optimization are both provided in binary form, each
-subject to the license agreement enclosed in the directory for the binary and is
-available here: `/opt/rocm/share/doc/rocm-llvm-alt/EULA`. By using, installing,
+subject to the license agreement enclosed in the directory for the binary available
+in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
 copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
 the terms and conditions of this license agreement. If you do not agree to the
 terms of this agreement, do not install, copy or use the AQL Profiler and/or the
@@ -134,9 +139,8 @@ AOCC CPU Optimizations.
 :::

 For the rest of the ROCm packages, you can find the licensing information at the
-following location: `/opt/rocm/share/doc/<component-name>/`
+following location: `/opt/rocm/share/doc/<component-name>/` or in the locations
+specified in the preceding table.

-For example, you can fetch the licensing information of the `_amd_comgr_`
-component (Code Object Manager) from the `amd_comgr` folder. A file named
-`LICENSE.txt` contains the license details at:
-`/opt/rocm-5.4.3/share/doc/amd_comgr/LICENSE.txt`
+For example, you can fetch the licensing information of the `amd_comgr`
+component (Code Object Manager) from the `/opt/rocm/share/doc/amd_comgr/LICENSE.txt` file.
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -17,10 +17,11 @@ Use this matrix to view the ROCm compatibility across successive major and minor

      :doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`, "Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
      ,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
-      ,"RHEL 9.3, 9.2","RHEL 9.3, 9.2"
+      ,"RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.3, 9.2"
      ,"RHEL 8.9, 8.8","RHEL 8.9, 8.8"
      ,"SLES 15 SP5, SP4","SLES 15 SP5, SP4"
      ,CentOS 7.9,CentOS 7.9
+      ,"Oracle Linux 8.9 [#oracle89]_"
      ,,
      :doc:`GFX Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3
      ,CDNA2,CDNA2
@@ -36,9 +37,9 @@ Use this matrix to view the ROCm compatibility across successive major and minor
      ,,
      ECOSYSTEM SUPPORT:,,
      :doc:`PyTorch <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`,"2.1, 2.0, 1.13","2.1, 2.0, 1.13"
-      :doc:`Tensorflow <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`,"2.15, 2.14, 2.13","2.14, 2.13, 2.12"
+      :doc:`TensorFlow <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`,"2.15, 2.14, 2.13","2.14, 2.13, 2.12"
      :doc:`JAX <rocm-install-on-linux:how-to/3rd-party/jax-install>`,0.4.26,0.4.26
-      `ONNX-RT <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.14.1
+      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.14.1
      ,,
      3RD PARTY COMMUNICATION LIBS:,,
      `UCC <https://github.com/ROCm/ucc>`_,>=1.2.0,>=1.2.0
@@ -52,12 +53,12 @@ Use this matrix to view the ROCm compatibility across successive major and minor
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0
      :doc:`MIGraphX <amdmigraphx:index>`,2.9.0,2.8.0
      :doc:`MIOpen <miopen:index>`,3.1.0,3.0.0
-      :doc:`MIVisionX <mivisionx:doxygen/html/index>`,2.5.0,2.5.0
+      :doc:`MIVisionX <mivisionx:index>`,2.5.0,2.5.0
      :doc:`rocDecode <rocdecode:index>`,0.5.0,N/A
-      :doc:`RPP <rpp:index>`,1.5.0,1.4.0
+      :doc:`ROCm Performance Primitives (RPP) <rpp:index>`,1.5.0,1.4.0
      ,,
      COMMUNICATION:,,
-      :doc:`rccl <rccl:index>`,2.18.6,2.18.3
+      :doc:`RCCL <rccl:index>`,2.18.6,2.18.3
      ,,
      MATH LIBS:,,
      `half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0
@@ -86,7 +87,7 @@ Use this matrix to view the ROCm compatibility across successive major and minor
      ,,
      SUPPORT LIBS:,,
      `hipother <https://github.com/ROCm/hipother>`_,6.1.40091,6.0.32830
-      `rocm-cmake <https://github.com/ROCm/rocm-cmake>`_,0.12.0,0.11.0
+      :doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.12.0,0.11.0
      `rocm-core <https://github.com/ROCm/rocm-core>`_,6.1.0,6.0.0
      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240125.3.30,20231016.2.245
      ,,
@@ -94,20 +95,19 @@ Use this matrix to view the ROCm compatibility across successive major and minor
      :doc:`AMD SMI <amdsmi:index>`,24.4.1,23.4.2
      :doc:`HIPIFY <hipify:index>`,17.0.0,17.0.0
      :doc:`ROCdbgapi <rocdbgapi:index>`,0.71.0,0.71.0
-      `ROCdebug-Agent <https://github.com/ROCm/rocr_debug_agent>`_,2.0.3,2.0.3
-      :doc:`rocGDB <rocgdb:index>`,14.1.0,13.2.0
-      :doc:`rocProfiler <rocprofiler:profiler_home_page>`,2.0.60100,2.0.0
+      :doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0
+      :doc:`ROCProfiler <rocprofiler:index>`,2.0.60100,2.0.0
      `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.3.0,N/A
-      :doc:`rocTracer <roctracer:index>`,4.1.60100,4.1.0
-      `rocm_bandwidth_test <https://github.com/ROCm/rocm_bandwidth_test>`_,1.4.0,1.4.0
+      :doc:`ROCTracer <roctracer:index>`,4.1.60100,4.1.0
+      :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0
      :doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0
-      `rocminfo <https://github.com/ROCm/rocminfo>`_,1.0.0,1.0.0
-      :doc:`ROCm SMI Lib <rocm_smi_lib:index>`,7.0.0,6.0.0
+      :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.1.0,13.2.0
+      :doc:`ROCm SMI <rocm_smi_lib:index>`,7.0.0,6.0.0
      :doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.1.0,rocm-6.0.0
+      :doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3
      :doc:`TransferBench <transferbench:index>`,1.48,1.46
      ,,
      COMPILERS:,,
-      `AOMP <https://github.com/ROCm/aomp>`_,17.60.0,17.60.0
      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,0.5.0,0.5.0
      `Flang <https://github.com/ROCm/flang>`_,17.0.0.24103,17.0.0.23483
      `llvm-project <https://github.com/ROCm/llvm-project>`_,17.0.0.24103,17.0.0.23483
@@ -116,11 +116,13 @@ Use this matrix to view the ROCm compatibility across successive major and minor
      RUNTIMES:,,
      :doc:`HIP <hip:index>`,6.1.40091,6.0.32830
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0
-      `ROCR Runtime <https://github.com/ROCm/ROCR-Runtime>`_,1.13.0,1.12.0
+      :doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.12.0


 .. rubric:: Footnotes
-.. [#] **For ROCm 6.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.3 & 8.9 and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
+
+.. [#red-hat94] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
+.. [#oracle89] **For ROCm 6.1.1** - Oracle Linux is supported only on AMD Instinct MI300X.
+.. [#] **For ROCm 6.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
 .. [#] **For ROCm 6.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9 and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.

-
--- a/docs/conceptual/setting-cus.rst
+++ b/docs/conceptual/setting-cus.rst
@@ -1,47 +0,0 @@
-.. meta::
-    :description: Setting the number of CUs
-    :keywords: AMD, ROCm, cu, number of cus
-
-.. _env-variables-reference:
-
-*************************************************************
-Setting the number of CUs
-*************************************************************
-
-When using GPUs to accelerate compute workloads, it sometimes becomes necessary
-to configure the hardware's usage of Compute Units (CU). This is a more advanced
-option, so please read this page before experimentation.
-
-The GPU driver provides two environment variables to set the number of CUs used. The
-first one is ``HSA_CU_MASK`` and the second one is ``ROC_GLOBAL_CU_MASK``. The main
-difference is that ``ROC_GLOBAL_CU_MASK`` sets the CU mask on queues created by the HIP
-or the OpenCL runtimes. While ``HSA_CU_MASK`` sets the mask on a lower level of queue
-creation in the driver, this mask will also be set for queues being profiled.
-
-The environment variables have the following syntax:
-
-::
-
-    ID = [0-9][0-9]*                         ex. base 10 numbers
-    ID_list = (ID | ID-ID)[, (ID | ID-ID)]*  ex. 0,2-4,7
-    GPU_list = ID_list                       ex. 0,2-4,7
-    CU_list = 0x[0-F]* | ID_list             ex. 0x337F OR 0,2-4,7
-    CU_Set = GPU_list : CU_list              ex. 0,2-4,7:0-15,32-47 OR 0,2-4,7:0x337F
-    HSA_CU_MASK = CU_Set [; CU_Set]*         ex. 0,2-4,7:0-15,32-47; 3-9:0x337F
-
-The GPU indices are taken post ``ROCR_VISIBLE_DEVICES`` reordering. For GPUs listed,
-the listed or masked CUs will be enabled, the rest disabled. Unlisted GPUs will not
-be affected, their CUs will all be enabled.
-
-The parsing of the variable is stopped when a syntax error occurs. The erroneous set
-and the ones following will be ignored. Repeating GPU or CU IDs are a syntax error.
-Specifying a mask with no usable CUs (CU_list is 0x0) is a syntax error. For excluding
-GPU devices use ``ROCR_VISIBLE_DEVICES``.
-
-These environment variables only affect ROCm software, not graphics applications.
-
-It's important to know that not all CU configurations are valid on all devices. For
-instance, on devices where two CUs can be combined into a WGP (for kernels running in
-WGP mode), it is not valid to disable only a single CU in a WGP. `This paper
-<https://www.cs.unc.edu/~otternes/papers/rtsj2022.pdf>`_ can provide more information
-about what to expect, when disabling CUs.
--- a/docs/conceptual/using-gpu-sanitizer.md
+++ b/docs/conceptual/using-gpu-sanitizer.md
@@ -13,7 +13,9 @@ This document provides documentation on using ROCm ASan.

 For information about LLVM ASan, see the [LLVM documentation](https://clang.llvm.org/docs/AddressSanitizer.html).

-**Note:** The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04.
+:::{note}
+The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04.
+:::

 ## Compiling for ASan

@@ -34,9 +36,13 @@ Recommendations for doing this are:

 Other architectures are allowed, but their device code will not be instrumented and a warning will be emitted.

-**Note:** It is not an error to compile some files without ASan instrumentation, but doing so reduces the ability of the process to detect addressing errors. However, if the main program "`a.out`" does not directly depend on the ASan runtime (`libclang_rt.asan-x86_64.so`) after the build completes (check by running `ldd` (List Dynamic Dependencies) or `readelf`), the application will immediately report an error at runtime as described in the next section.
+:::{tip}
+It is not an error to compile some files without ASan instrumentation, but doing so reduces the ability of the process to detect addressing errors. However, if the main program "`a.out`" does not directly depend on the ASan runtime (`libclang_rt.asan-x86_64.so`) after the build completes (check by running `ldd` (List Dynamic Dependencies) or `readelf`), the application will immediately report an error at runtime as described in the next section.
+:::

-**Note:** When compiling OpenMP programs with ASan instrumentation, it is currently necessary to set the environment variable `LIBRARY_PATH` to `/opt/rocm-<version>/lib/llvm/lib/asan:/opt/rocm-<version>/lib/asan`. At runtime, it may be necessary to add `/opt/rocm-<version>/lib/llvm/lib/asan` to `LD_LIBRARY_PATH`.
+:::{note}
+When compiling OpenMP programs with ASan instrumentation, it is currently necessary to set the environment variable `LIBRARY_PATH` to `/opt/rocm-<version>/lib/llvm/lib/asan:/opt/rocm-<version>/lib/asan`. At runtime, it may be necessary to add `/opt/rocm-<version>/lib/llvm/lib/asan` to `LD_LIBRARY_PATH`.
+:::

 ### About compilation time

@@ -92,15 +98,23 @@ If it does not appear, when executed the application will quickly output an ASan

 There is an environment variable, `ASAN_OPTIONS`, that can be used to adjust the runtime behavior of the ASan runtime itself. There are more than a hundred "flags" that can be adjusted (see an old list at [flags](https://github.com/google/sanitizers/wiki/AddressSanitizerFlags)) but the default settings are correct and should be used in most cases. It must be noted that these options only affect the host ASan runtime. The device runtime only currently supports the default settings for the few relevant options.

-There are two `ASAN_OPTION` flags of particular note.
+There are three `ASAN_OPTION` flags of note.

 * `halt_on_error=0/1 default 1`.

-This tells the ASan runtime to halt the application immediately after detecting and reporting an addressing error. The default makes sense because the application has entered the realm of undefined behavior. If the developer wishes to have the application continue anyway, this option can be set to zero. However, the application and libraries should then be compiled with the additional option `-fsanitize-recover=address`. Note that the ROCm optional ASan instrumented libraries are not compiled with this option and if an error is detected within one of them, but halt_on_error is set to 0, more undefined behavior will occur.
+  This tells the ASan runtime to halt the application immediately after detecting and reporting an addressing error. The default makes sense because the application has entered the realm of undefined behavior. If the developer wishes to have the application continue anyway, this option can be set to zero. However, the application and libraries should then be compiled with the additional option `-fsanitize-recover=address`. Note that the ROCm optional ASan instrumented libraries are not compiled with this option and if an error is detected within one of them, but halt_on_error is set to 0, more undefined behavior will occur.

 * `detect_leaks=0/1 default 1`.

-This option directs the ASan runtime to enable the [Leak Sanitizer](https://clang.llvm.org/docs/LeakSanitizer.html) (LSan). Unfortunately, for heterogeneous applications, this default will result in significant output from the leak sanitizer when the application exits due to allocations made by the language runtime which are not considered to be leaks. This output can be avoided by adding `detect_leaks=0` to the `ASAN_OPTIONS`, or alternatively by producing an LSan suppression file (syntax described [here](https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer)) and activating it with environment variable `LSAN_OPTIONS=suppressions=/path/to/suppression/file`. When using a suppression file, a suppression report is printed by default. The suppression report can be disabled by using the `LSAN_OPTIONS` flag `print_suppressions=0`.
+  This option directs the ASan runtime to enable the [Leak Sanitizer](https://clang.llvm.org/docs/LeakSanitizer.html) (LSan). For heterogeneous applications, this default results in significant output from the leak sanitizer when the application exits due to allocations made by the language runtime which are not considered to be leaks. This output can be avoided by adding `detect_leaks=0` to the `ASAN_OPTIONS`, or alternatively by producing an LSan suppression file (syntax described [here](https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer)) and activating it with environment variable `LSAN_OPTIONS=suppressions=/path/to/suppression/file`. When using a suppression file, a suppression report is printed by default. The suppression report can be disabled by using the `LSAN_OPTIONS` flag `print_suppressions=0`.
+
+* `quarantine_size_mb=N default 256`
+
+  This option defines the number of megabytes (MB) `N` of memory that the ASan runtime will hold after it is `freed` to detect use-after-free situations. This memory is unavailable for other purposes. The default of 256 MB may be too small to detect some use-after-free situations, especially given that the large size of many GPU memory allocations may push `freed` allocations out of quarantine before the attempted use.
+
+  :::{note}
+  Setting the value of `quarantine_size_mb` larger may enable more problematic uses to be detected, but at the cost of reducing memory available for other purposes.
+  :::

 ## Runtime overhead

@@ -186,7 +200,7 @@ or

 currently may include one or two surprising CPU side tracebacks mentioning :`hostcall`". This is due to how `malloc` and `free` are implemented for GPU code and these call stacks can be ignored.

-### Running with `rocgdb`
+## Running ASan with `rocgdb`

 `rocgdb` can be used to further investigate ASan detected errors, with some preparation.

@@ -238,7 +252,7 @@ $ rocgdb <path to application>
 (gdb) c
 ```

-### Using ASan with a short HIP application
+## Using ASan with a short HIP application

 Consider the following simple and short demo of using the Address Sanitizer with a HIP application:

@@ -402,7 +416,7 @@ Shadow byte legend (one shadow byte represents 8 application bytes):
 ==2817==ABORTING
 ```

-### Known issues with using GPU sanitizer
+## Known issues with using GPU sanitizer

 * Red zones must have limited size. It is possible for an invalid access to completely miss a red zone and not be detected.

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -57,34 +57,10 @@ article_pages = [
        "date":"2024-06-04"
    },

-    {"file":"install/windows/install-quick", "os":["windows"]},
-    {"file":"install/linux/install-quick", "os":["linux"]},
-
-    {"file":"install/linux/install", "os":["linux"]},
-    {"file":"install/linux/install-options", "os":["linux"]},
-    {"file":"install/linux/prerequisites", "os":["linux"]},
-
-    {"file":"install/docker", "os":["linux"]},
-    {"file":"install/magma-install", "os":["linux"]},
-    {"file":"install/pytorch-install", "os":["linux"]},
-    {"file":"install/tensorflow-install", "os":["linux"]},
-
-    {"file":"install/windows/install", "os":["windows"]},
-    {"file":"install/windows/prerequisites", "os":["windows"]},
-    {"file":"install/windows/cli/index", "os":["windows"]},
-    {"file":"install/windows/gui/index", "os":["windows"]},
-
-    {"file":"about/compatibility/docker-image-support-matrix", "os":["linux"]},
-    {"file":"about/compatibility/user-kernel-space-compat-matrix", "os":["linux"]},
-
-    {"file":"reference/library-index", "os":["linux"]},
-
    {"file":"how-to/deep-learning-rocm", "os":["linux"]},
    {"file":"how-to/gpu-enabled-mpi", "os":["linux"]},
    {"file":"how-to/system-debugging", "os":["linux"]},
    {"file":"how-to/tuning-guides", "os":["linux", "windows"]},
-
-    {"file":"rocm-a-z", "os":["linux", "windows"]},
 ]

 exclude_patterns = ['temp']
@@ -108,5 +84,5 @@ html_theme_options = {
 }

 redirects = {
-     "reference/openmp/openmp": "../../about/compatibility/openmp.html"
+    "reference/openmp/openmp": "../../about/compatibility/openmp.html"
 }
--- a/docs/data/how-to/framework_install_2024_05_23.png
+++ b/docs/data/how-to/framework_install_2024_05_23.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/attention-module.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/attention-module.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg
--- a/docs/data/how-to/llm-fine-tuning-optimization/compute-unit.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/compute-unit.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg
+++ b/docs/data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg
--- a/docs/data/how-to/llm-fine-tuning-optimization/profiling-perfetto-ui.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/profiling-perfetto-ui.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/tunableop.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/tunableop.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png
--- a/docs/data/how-to/llm-fine-tuning-optimization/weight-update.png
+++ b/docs/data/how-to/llm-fine-tuning-optimization/weight-update.png
--- a/docs/data/how-to/rocm-for-hpc/hpc-stack-2024_6_20.png
+++ b/docs/data/how-to/rocm-for-hpc/hpc-stack-2024_6_20.png
--- a/docs/how-to/deep-learning-rocm.rst
+++ b/docs/how-to/deep-learning-rocm.rst
@@ -8,44 +8,14 @@ Installing deep learning frameworks for ROCm

 ROCm provides a comprehensive ecosystem for deep learning development, including
 :ref:`libraries <artificial-intelligence-apis>` for optimized deep learning operations and ROCm-aware versions of popular
-deep learning frameworks and libraries such as PyTorch, TensorFlow, JAX, and MAGMA. ROCm works closely with these
+deep learning frameworks and libraries such as PyTorch, TensorFlow, and JAX. ROCm works closely with these
 frameworks to ensure that framework-specific optimizations take advantage of AMD accelerator and GPU architectures.

 The following guides cover installation processes for ROCm-aware deep learning frameworks.

-.. grid::
-
-   .. grid-item::
-      :columns: 3
-
-      :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
-
-   .. grid-item::
-      :columns: 3
-
-      :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
-
-   .. grid-item::
-      :columns: 3
-
-   .. grid-item::
-      :columns: 3
-
-   .. grid-item::
-      :columns: 3
-
-      :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
-
-   .. grid-item::
-      :columns: 3
-
-      :doc:`MAGMA for ROCm <rocm-install-on-linux:how-to/3rd-party/magma-install>`
-
-   .. grid-item::
-      :columns: 3
-
-   .. grid-item::
-      :columns: 3
+* :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
+* :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
+* :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`

 The following chart steps through typical installation workflows for installing deep learning frameworks for ROCm.

@@ -65,4 +35,4 @@ through the following guides.

 * :doc:`rocm-for-ai/index`

-* :doc:`fine-tuning-llms/index`
+* :doc:`llm-fine-tuning-optimization/index`
--- a/docs/how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst
--- a/docs/how-to/llm-fine-tuning-optimization/index.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/index.rst
--- a/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst
@@ -28,18 +28,9 @@ graphs, tensor parallel multi-GPU, GPTQ, AWQ, and token speculation.
 Installing vLLM
 ---------------

-1. To install vLLM, run the following commands.
-
-   .. code-block:: shell
-
-      # Install from source
-      git clone https://github.com/ROCm/vllm.git    
-      cd vllm
-      PYTORCH_ROCM_ARCH=gfx942 python setup.py install #MI300 series
-
 .. _fine-tuning-llms-vllm-rocm-docker-image:

-2. Run the following commands to build a Docker image ``vllm-rocm``.
+1. Run the following commands to build a Docker image ``vllm-rocm``.

   .. code-block:: shell

@@ -52,7 +43,7 @@ Installing vLLM
   .. tab-item:: vLLM on a single-accelerator system
      :sync: single

-      3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
+      2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
         Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.

         .. code-block:: shell
@@ -69,7 +60,7 @@ Installing vLLM
               vllm-rocm \
               bash

-      4. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.
+      3. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.

         .. code-block:: shell

@@ -77,11 +68,11 @@ Installing vLLM

         The following log message is displayed in your command line indicates that the server is listening for requests.

-         .. image:: ../../data/how-to/fine-tuning-llms/vllm-single-gpu-log.png
+         .. image:: ../../data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png
            :alt: vLLM API server log message
            :align: center

-      5. To test, send it a curl request containing a prompt.
+      4. To test, send it a curl request containing a prompt.

         .. code-block:: shell

@@ -92,11 +83,11 @@ Installing vLLM
         .. code-block:: text

            {"text":["What is AMD Instinct?\nAmd Instinct is a brand new line of high-performance computing (HPC) processors from Advanced Micro Devices (AMD). These processors are designed to deliver unparalleled performance for HPC workloads, including scientific simulations, data analytics, and machine learning.\nThe Instinct lineup includes a range of processors, from the entry-level Inst"]}
-            
+
   .. tab-item:: vLLM on a multi-accelerator system
      :sync: multi

-      3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
+      2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
         Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.

         .. code-block:: shell
@@ -114,14 +105,14 @@ Installing vLLM
               bash


-      4. To run API server on multiple GPUs, use the ``-tp``  or ``--tensor-parallel-size``  parameter. For example, to use two
+      3. To run API server on multiple GPUs, use the ``-tp``  or ``--tensor-parallel-size``  parameter. For example, to use two
         GPUs, start the API server using the following command.

         .. code-block:: shell

            python -m vllm.entrypoints.api_server --model /app/model --dtype float16 -tp 2 --port 8000 &

-      5. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
+      4. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
         isolate each instance to a different accelerator.

         For example, to run two API servers, one on port 8000 using GPU 0 and 1, one on port 8001 using GPU 2 and 3, use a
@@ -132,7 +123,7 @@ Installing vLLM
            ROCR_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 –tp 2 --port 8000 &
            ROCR_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 –tp 2--port 8001 &

-      6. To test, send it a curl request containing a prompt.
+      5. To test, send it a curl request containing a prompt.

         .. code-block:: shell

@@ -163,27 +154,29 @@ speculation.
 Install TGI
 -----------

-1. To install the TGI Docker image, run the following commands.
+1. Launch the TGI Docker container in the host machine.

   .. code-block:: shell

-      # Install from Dockerfile
-      git clone https://github.com/huggingface/text-generation-inference.git -b mi300-compat    
-      cd text-generation-inference
-      docker build . -f Dockerfile.rocm
+      docker run --name tgi --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
+      --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 256g
+      --net host -v $PWD:/data
+      --entrypoint "/bin/bash"
+      --env HUGGINGFACE_HUB_CACHE=/data
+      ghcr.io/huggingface/text-generation-inference:latest-rocm

 .. tab-set::

   .. tab-item:: TGI on a single-accelerator system
      :sync: single

-      2. Launch a model using TGI server on a single accelerator.
+      2. Inside the container, launch a model using TGI server on a single accelerator.

         .. code-block:: shell

            export ROCM_USE_FLASH_ATTN_V2_TRITON=True
            text-generation-launcher --model-id NousResearch/Meta-Llama-3-70B --dtype float16 --port 8000 &
-      
+
      3. To test, send it a curl request containing a prompt.

         .. code-block:: shell
@@ -191,26 +184,26 @@ Install TGI
            curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'

         You should receive a response like the following.
-      
+
         .. code-block:: shell

            data:{"index":20,"token":{"id":304,"text":" in","logprob":-1.2822266,"special":false},"generated_text":" AMD Instinct is a new family of data center GPUs designed to accelerate the most demanding workloads in","details":null}

   .. tab-item:: TGI on a multi-accelerator system

-      2. Launch a model using TGI server on multiple accelerators (4 in this case).
+      2. Inside the container, launch a model using TGI server on multiple accelerators (4 in this case).

         .. code-block:: shell

            export ROCM_USE_FLASH_ATTN_V2_TRITON=True
            text-generation-launcher --model-id NousResearch/Meta-Llama-3-8B --dtype float16 --port 8000 --num-shard 4 &
-      
+
      3. To test, send it a curl request containing a prompt.

         .. code-block:: shell

            curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
-      
+
         You should receive a response like the following.

         .. code-block:: shell
--- a/docs/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst
@@ -18,7 +18,7 @@ Attention (GQA), and Multi-Query Attention (MQA). This reduction in memory movem
 time-to-first-token (TTFT) latency for large batch sizes and long prompt sequences, thereby enhancing overall
 performance.

-.. image:: ../../data/how-to/fine-tuning-llms/attention-module.png
+.. image:: ../../data/how-to/llm-fine-tuning-optimization/attention-module.png
   :alt: Attention module of a large language module utilizing tiling
   :align: center

@@ -243,7 +243,7 @@ page describes the options.
   Validator,ROCBLAS_VERSION,4.1.0-cefa4a9b-dirty
   GemmTunableOp_float_TN,tn_200_100_20,Gemm_Rocblas_32323,0.00669595

-.. image:: ../../data/how-to/fine-tuning-llms/tunableop.png
+.. image:: ../../data/how-to/llm-fine-tuning-optimization/tunableop.png
   :alt: GEMM and TunableOp
   :align: center

--- a/docs/how-to/llm-fine-tuning-optimization/model-quantization.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/model-quantization.rst
@@ -154,13 +154,13 @@ kernels by configuring the ``exllama_config`` parameter as the following.
 .. code-block:: python

   from transformers import AutoModelForCausalLM, GPTQConfig
-   pretrained_model_dir = "meta-llama/Llama-2-7b"
-   gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+   #pretrained_model_dir = "meta-llama/Llama-2-7b"
+   base_model_name = "NousResearch/Llama-2-7b-hf"
+   gptq_config = GPTQConfig(bits=4, dataset="c4", exllama_config={"version":2})
   quantized_model = AutoModelForCausalLM.from_pretrained(
-                           base_model_name, 
-                           device_map="auto", 
+                           base_model_name,
+                           device_map="auto",
                           quantization_config=gptq_config)
-
 bitsandbytes
 ============

--- a/docs/how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst
--- a/docs/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst
@@ -31,7 +31,7 @@ Each accelerator or GPU has multiple Compute Units (CUs) and various CUs do comp
 can a compute kernel can allocate its task to? For the :doc:`AMD MI300X accelerator <../../reference/gpu-arch-specs>`, the
 grid should have at least 1024 thread blocks or workgroups.

-.. figure:: ../../data/how-to/fine-tuning-llms/compute-unit.png
+.. figure:: ../../data/how-to/llm-fine-tuning-optimization/compute-unit.png

   Schematic representation of a CU in the CDNA2 or CDNA3 architecture.

@@ -187,7 +187,7 @@ Kernel occupancy

 .. _fine-tuning-llms-occupancy-vgpr-table:

-.. figure:: ../../data/how-to/fine-tuning-llms/occupancy-vgpr.png
+.. figure:: ../../data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png
   :alt: Occupancy related to VGPR usage in an Instinct MI300X accelerator.
   :align: center

@@ -343,11 +343,6 @@ or :doc:`rocBLAS <rocblas:index>` is faster for a specific operation.
  then required to strip out the kernel and create kernel
  compilation and launch via Triton.

-* For advanced ``matmul`` or ``conv`` configuration tuning, the ``inductor-gemm-tuner`` can
-  help. This implements the Triton ``conv``/``mm`` implementations used upstream
-  and allows specification of inputs and configuration tuning search space if new
-  tunings are found that can be added to the auto-tune list.
-
 Other guidelines
 ================

--- a/docs/how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md
+++ b/docs/how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md
@@ -32,7 +32,7 @@ The template parameters of the instance are grouped into four parameter types:
 ================
 ### Figure 2
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-template_parameters.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg
 The template parameters of the selected GEMM kernel are classified into four groups. These template parameter groups should be defined properly before running the instance.
 ```

@@ -126,7 +126,7 @@ The row and column, and stride information of input matrices are also passed to
 ================
 ### Figure 3
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-kernel_launch.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg
 Templated kernel launching consists of kernel instantiation, making arguments by passing in actual application parameters, creating an invoker, and running the instance through the invoker.
 ```

@@ -155,7 +155,7 @@ The first operation in the process is to perform the multiplication of input mat
 ================
 ### Figure 4
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-operation_flow.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg
 Operation flow.
 ```

@@ -171,7 +171,7 @@ Here, we use [DeviceBatchedGemmMultiD_Xdl](https://github.com/ROCm/composable_ke
 ================
 ### Figure 5
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-root_instance.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg
 Use the ‘DeviceBatchedGemmMultiD_Xdl’ instance as a root.
 ```

@@ -421,7 +421,7 @@ Run `python setup.py install` to build and install the extension. It should look
 ================
 ### Figure 6
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-compilation.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg
 Compilation and installation of the INT8 kernels.
 ```

@@ -433,7 +433,7 @@ The implementation architecture of running SmoothQuant models on MI300X GPUs is
 ================
 ### Figure 7
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-inference_flow.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg
 The implementation architecture of running SmoothQuant models on AMD MI300X accelerators.
 ```

@@ -459,7 +459,7 @@ Figure 8 shows the performance comparisons between the original FP16 and the Smo
 ================
 ### Figure 8
 ================ -->
-```{figure} ../../data/how-to/fine-tuning-llms/ck-comparisons.jpg
+```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg
 Performance comparisons between the original FP16 and the SmoothQuant-quantized INT8 models on a single MI300X accelerator.
 ```

--- a/docs/how-to/llm-fine-tuning-optimization/overview.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/overview.rst
@@ -41,7 +41,7 @@ The weight update is as follows: :math:`W_{updated} = W + ΔW`.
 If the weight matrix :math:`W` contains 7B parameters, then the weight update matrix :math:`ΔW` should also
 contain 7B parameters. Therefore, the :math:`ΔW` calculation is computationally and memory intensive.

-.. figure:: ../../data/how-to/fine-tuning-llms/weight-update.png
+.. figure:: ../../data/how-to/llm-fine-tuning-optimization/weight-update.png
   :alt: Weight update diagram

   (a) Weight update in regular fine-tuning. (b) Weight update in LoRA where the product of matrix A (:math:`M\times K`)
--- a/docs/how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst
@@ -38,7 +38,7 @@ You can then visualize and view these metrics using an open-source profile visua
   shows transactions denoting the CPU activities that launch GPU kernels while the lower section shows the actual GPU
   activities where it processes the ``resnet18`` inferences layer by layer. 

-   .. figure:: ../../data/how-to/fine-tuning-llms/perfetto-trace.svg
+   .. figure:: ../../data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg
      
      Perfetto trace visualization example.

@@ -100,7 +100,7 @@ analyze bottlenecks and stressors for their computational workloads on AMD Insti
   Omniperf collects hardware counters in multiple passes, and will therefore re-run the application during each pass
   to collect different sets of metrics.

-.. figure:: ../../data/how-to/fine-tuning-llms/omniperf-analysis.png
+.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png

   Omniperf memory chat analysis panel.

@@ -130,7 +130,7 @@ hardware counters are also included.
   have the greatest impact on the end-to-end execution of the application and to discover what else is happening on the
   system during a performance bottleneck.

-.. figure:: ../../data/how-to/fine-tuning-llms/omnitrace-timeline.png
+.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png

   Omnitrace timeline trace example.

@@ -138,10 +138,10 @@ For details usage and examples of using these tools, refer to the
 `Introduction to profiling tools for AMD hardware <https://rocm.blogs.amd.com/software-tools-optimization/profilers/README.html>`_
 developer blog.

-Debugging with ROCm Debug Agent
+Debugging with ROCr Debug Agent
 ===============================

-ROCm Debug Agent (:doc:`ROCdebug-agent <rocr_debug_agent:index>`) is a library that can be loaded by the ROCm platform
+:doc:`ROCr Debug Agent <rocr_debug_agent:index>`) is a library that can be loaded by the ROCm platform
 runtime (:doc:`ROCr <rocr-runtime:index>`) to provide the following functionalities for all AMD accelerators and GPUs
 supported by the ROCm Debugger API (:doc:`ROCdbgapi <rocdbgapi:index>`).

@@ -155,9 +155,9 @@ Debugging memory access faults
 ------------------------------

 Identifying a faulting kernel is often enough to triage a memory access fault. To that end, the
-`ROCm Debug Agent <https://github.com/ROCm/rocr_debug_agent/>`_ can trap a memory access fault and provide a dump of all
+`ROCr Debug Agent <https://github.com/ROCm/rocr_debug_agent/>`_ can trap a memory access fault and provide a dump of all
 active wavefronts that caused the error as well as the name of the kernel. The
-`AMD ROCm Debug Agent Library README <https://github.com/ROCm/rocr_debug_agent/blob/master/README.md>`_ provides full
+`ROCr Debug Agent Library README <https://github.com/ROCm/rocr_debug_agent/blob/master/README.md>`_ provides full
 instructions, but in brief:

 *  Compiling with ``-ggdb -O0`` is recommended but not required.
--- a/docs/how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst
+++ b/docs/how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst
--- a/docs/how-to/rocm-for-ai/index.rst
+++ b/docs/how-to/rocm-for-ai/index.rst
@@ -21,3 +21,6 @@ In this guide, you'll learn about:
 - :doc:`Running models from Hugging Face <hugging-face-models>`

 - :doc:`Deploying your model <deploy-your-model>`
+
+To learn about ROCm for HPC applications and scientific computing, see
+:doc:`../rocm-for-hpc/index`.
--- a/docs/how-to/rocm-for-ai/train-a-model.rst
+++ b/docs/how-to/rocm-for-ai/train-a-model.rst
@@ -110,7 +110,7 @@ Fine-tuning your model
 ROCm supports multiple techniques for :ref:`optimizing fine-tuning <fine-tuning-llms-concept-optimizations>`, for
 example, LoRA, QLoRA, PEFT, and FSDP.

-Learn more about challenges and solutions for model fine-tuning in :doc:`../fine-tuning-llms/index`.
+Learn more about challenges and solutions for model fine-tuning in :doc:`../llm-fine-tuning-optimization/index`.

 The following developer blogs showcase examples of how to fine-tune a model on an AMD accelerator or GPU.

--- a/docs/how-to/rocm-for-hpc/index.rst
+++ b/docs/how-to/rocm-for-hpc/index.rst
@@ -0,0 +1,231 @@
+.. meta::
+   :description: How to use ROCm for HPC
+   :keywords: ROCm, AI, high performance computing, HPC
+
+******************
+Using ROCm for HPC
+******************
+
+The ROCm open-source software stack is optimized to extract high-performance
+computing (HPC) workload performance from AMD Instinct™ accelerators
+while maintaining compatibility with industry software frameworks.
+
+ROCm enhances support and access for developers by providing streamlined and
+improved tools that significantly increase productivity. Being open-source, ROCm
+fosters innovation, differentiation, and collaboration within the developer
+community, making it a powerful and accessible solution for leveraging the full
+potential of AMD accelerators' capabilities in diverse computational
+applications.
+
+* For more information, see :doc:`What is ROCm? <../../what-is-rocm>`.
+
+* For guidance on installing ROCm, see :doc:`rocm-install-on-linux:index`. See
+  the :doc:`../../compatibility/compatibility-matrix` for details on hardware
+  and operating system support.
+
+Some of the most popular HPC frameworks are part of the ROCm platform, including
+those to help parallelize operations across multiple accelerators and servers,
+handle memory hierarchies, and solve linear systems.
+
+.. image:: ../../data/how-to/rocm-for-hpc/hpc-stack-2024_6_20.png
+   :align: center
+   :alt: Software and hardware ecosystem surrounding ROCm and AMD Instinct for HPC
+
+The following catalog of GPU-accelerated solutions includes a vast set of
+platform-compatible HPC applications, including those for astrophysics, climate 
+and weather, computational chemistry, computational fluid dynamics, earth
+science, genomics, geophysics, molecular dynamics, and physics computing.
+
+Refer to the resources in the following table for instructions on building,
+running, and deploying these applications on ROCm-capable systems with AMD
+Instinct accelerators. Each build container provides parameters to specify
+different source code branches, release versions of ROCm, OpenMPI, UCX, and
+Ubuntu versions.
+
+.. _hpc-apps:
+
+..
+   Reduce font size of HPC app descriptions slightly.
+
+.. raw:: html
+
+   <style>
+     #hpc-apps-table tr td:last-child {
+       font-size: 0.9rem;
+     }
+   </style>
+
+.. container::
+   :name: hpc-apps-table
+
+   .. list-table::
+      :header-rows: 1
+      :stub-columns: 1
+      :widths: 2 2 5
+
+      * - Application domain
+        - HPC application
+        - Description
+
+      * - Physics
+        - `Chroma <https://github.com/amd/InfinityHub-CI/tree/main/chroma/>`_
+        - The Chroma package supports data-parallel programming constructs for lattice
+          field theory and in particular lattice QCD. It uses the SciDAC QDP++ data-parallel
+          programming (in C++) that presents a single high-level code image to the user,
+          but can generate highly optimized code for many architectural systems including
+          single node workstations, multi and many-core nodes, clusters of nodes via
+          QMP, and classic vector computers.
+
+      * -
+        - `Grid <https://github.com/amd/InfinityHub-CI/tree/main/grid/>`_
+        - Grid is a library for lattice QCD calculations that employs a high-level data parallel
+          approach while using a number of techniques to target multiple types of parallelism.
+          The library currently supports MPI, OpenMP and short vector parallelism. The SIMD
+          instructions sets covered include SSE, AVX, AVX2, FMA4, IMCI and AVX512. Recent
+          releases expanded this support to include GPU offloading.
+
+      * -
+        - `MILC <https://github.com/amd/InfinityHub-CI/tree/main/milc/>`_
+        - The MILC Code is a set of research codes developed by MIMD Lattice Computation
+          (MILC) collaboration for doing simulations of four dimensional SU(3) lattice gauge
+          theory on MIMD parallel machines scaling from single-processor workstations
+          to HPC systems. The MILC Code is publicly available for research purposes.
+          Publications of work done using this code or derivatives of this code should
+          acknowledge this use.
+
+      * -
+        - `PIConGPU <https://github.com/amd/InfinityHub-CI/tree/main/picongpu>`_
+        - PIConGPU (Particle-in-cell on Graphics Processing Units) is an Open Source
+          simulations framework for plasma and laser-plasma physics used to develop
+          advanced particle accelerators for radiation therapy of cancer, high energy
+          physics and photon science.
+
+      * - Astrophysics
+        - `Cholla <https://github.com/amd/InfinityHub-CI/tree/main/cholla/>`_
+        - An astrophysical simulation code developed for the extreme environments
+          encountered in astrophysical systems.
+
+      * - Geophysics
+        - `SPECFEM3D Cartesian <https://github.com/amd/InfinityHub-CI/tree/main/specfem3d>`_
+        - SPECFEM3D Cartesian simulates acoustic (fluid), elastic (solid), coupled
+          acoustic/elastic, poroelastic or seismic wave propagation in any type of
+          conforming mesh of hexahedra (structured or not.) It can, for instance,
+          model seismic waves propagating in sedimentary basins or any other
+          regional geological model following earthquakes. It can also be used
+          for non-destructive testing or for ocean acoustics.
+
+      * - Molecular dynamics
+        - `GROMACS with HIP (AMD implementation) <https://github.com/amd/InfinityHub-CI/tree/main/gromacs>`_
+        - GROMACS is a versatile package to perform molecular dynamics, i.e.
+          simulate the Newtonian equations of motion for systems with hundreds
+          to millions of particles. This AMD container is based on a released
+          version of GROMACS modified by AMD. This container only supports up
+          to a 8 GPU configuration
+
+      * -
+        - `LAMMPS <https://github.com/amd/InfinityHub-CI/tree/main/lammps>`_
+        - LAMMPS is a classical molecular dynamics code with a focus on materials
+          modeling. It's an acronym for Large-scale Atomic/Molecular Massively
+          Parallel Simulator.
+
+      * - Computational fluid dynamics
+        - `NEKO <https://github.com/amd/InfinityHub-CI/tree/main/neko>`_
+        - Neko is a portable framework for high-order spectral element flow simulations.
+          Written in modern Fortran, Neko adopts an object-oriented approach, allowing
+          multi-tier abstractions of the solver stack and facilitating various hardware
+          backends ranging from general-purpose processors, CUDA and HIP enabled
+          accelerators to SX-Aurora vector processors.
+
+      * -
+        - `nekRS <https://github.com/amd/InfinityHub-CI/tree/main/nekrs>`_
+        - nekRS is an open-source Navier Stokes solver based on the spectral element
+          method targeting classical processors and accelerators like GPUs. 
+
+      * - Computational chemistry
+        - `QUDA <https://github.com/amd/InfinityHub-CI/tree/main/quda>`_
+        - Library designed for efficient lattice QCD computations on
+          accelerators. It includes optimized Dirac operators and a variety of
+          fermion solvers and conjugate gradient (CG) implementations, enhancing
+          performance and accuracy in lattice QCD simulations.
+
+      * - Electronic structure
+        - `CP2K <https://github.com/amd/InfinityHub-CI/tree/main/cp2k>`_
+        - CP2K is a quantum chemistry and solid state physics software package that can
+          perform atomistic simulations of solid state, liquid, molecular, periodic, material,
+          crystal, and biological systems. This AMD container, based on a released version
+          of CP2K, is an AMD beta version with ongoing optimizations.
+
+      * - Quantum Monte Carlo Simulation
+        - `QMCPACK <https://github.com/amd/InfinityHub-CI/tree/main/qmcpack>`_
+        - QMCPACK is an open-source production-level many-body ab initio Quantum
+          Monte Carlo code for computing the electronic structure of atoms, molecules, 2D
+          nanomaterials and solids. The solid-state capabilities include metallic systems
+          as well as insulators. QMCPACK is expected to run well on workstations through
+          to the latest generation supercomputers. Besides high performance, particular
+          emphasis is placed on code quality and reproducibility.
+
+      * - Climate and weather
+        - `MPAS <https://github.com/amd/InfinityHub-CI/tree/main/mpas>`_
+        - The Model for Prediction Across Scales (MPAS) is a collaborative project for
+          developing atmosphere, ocean, and other earth-system simulation components
+          for use in climate, regional climate, and weather studies.
+
+      * - Benchmark
+        - `rocHPL <https://github.com/amd/InfinityHub-CI/tree/main/rochpl>`_
+        - HPL, or High-Performance Linpack, is a benchmark which solves a uniformly 
+          random system of linear equations and reports floating-point execution rate. 
+          This documentation supports the implementation of the HPL benchmark on 
+          top of AMD's ROCm platform.
+
+      * -
+        - `rocHPL-MxP <https://github.com/amd/InfinityHub-CI/tree/main/hpl-mxp>`_
+        - Benchmark that highlights the convergence of HPC and AI workloads by
+          solving a system of linear equations using novel, mixed-precision
+          algorithms.
+
+      * -
+        - `HPCG <https://github.com/amd/InfinityHub-CI/tree/main/hpcg>`_
+        - HPCG, or the High Performance Conjugate Gradient Benchmark complements
+          the High Performance LINPACK (HPL) benchmark. The computational and data
+          access patterns of HPCG are designed to closely match a broad set of important
+          applications not represented by HPL, and to incentivize computer system
+          designers to invest in capabilities that will benefit the collective performance
+          of these applications.
+
+      * - Tools and libraries
+        - `ROCm with GPU-aware MPI container <https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker>`_
+        - Base container for GPU-aware MPI with ROCm for HPC applications. This
+          project provides a boilerplate for building and running a Docker
+          container with ROCm supporting GPU-aware MPI implementations using
+          OpenMPI or UCX.
+
+      * -
+        - `Kokkos <https://github.com/amd/InfinityHub-CI/tree/main/kokkos>`_
+        - Kokkos is a programming model in C++ for writing performance portable
+          applications for use across HPC platforms. It provides abstractions for both
+          parallel execution of code and data management. Kokkos is designed to target
+          complex node architectures with N-level memory hierarchies and multiple types
+          of execution resources.
+
+      * -
+        - `PyFR <https://github.com/amd/InfinityHub-CI/tree/main/pyfr>`_
+        - PyFR is an open-source Python based framework for solving advection-diffusion
+          type problems on streaming architectures using the Flux Reconstruction approach of
+          Huynh. The framework is designed to solve a range of governing systems on mixed
+          unstructured grids containing various element types. It is also designed to target a
+          range of hardware platforms via use of an in-built domain specific language derived
+          from the Mako templating engine.
+
+      * -
+        - `RAJA <https://github.com/amd/InfinityHub-CI/tree/main/raja>`_
+        - RAJA is a library of C++ software abstractions, primarily developed at Lawrence
+          Livermore National Laboratory (LLNL), that enables architecture and programming
+          model portability for HPC applications.
+
+      * -
+        - `Trilinos <https://github.com/amd/InfinityHub-CI/tree/main/trilinos>`_
+        - The Trilinos Project is an effort to develop algorithms and enabling technologies
+          within an object-oriented software framework for the solution of large-scale,
+          complex multi-physics engineering and scientific problems.
+
+To learn about ROCm for AI applications, see :doc:`../rocm-for-ai/index`.
--- a/docs/how-to/setting-cus.rst
+++ b/docs/how-to/setting-cus.rst
@@ -0,0 +1,42 @@
+.. meta::
+    :description: Setting the number of CUs
+    :keywords: CU, CUs, number of CUs, compute units
+
+.. _settings-cus-reference:
+
+*************************************************************
+Setting the number of compute units
+*************************************************************
+
+The GPU driver provides two environment variables to set the number of CUs used:
+
+- ``HSA_CU_MASK``
+- ``ROC_GLOBAL_CU_MASK``
+
+The ``ROC_GLOBAL_CU_MASK`` variable sets the CU mask on queues created by HIP or OpenCL runtimes. The ``HSA_CU_MASK`` variable sets the mask on a lower level of queue creation in the driver. It also sets the mask on the queues being profiled.
+
+.. tip::
+
+    When using GPUs to accelerate compute workloads, it sometimes becomes necessary to configure the hardware's usage of compute units (CU). This is a more advanced option, so please read this page before experimentation.
+
+The environment variables have the following syntax:
+
+::
+
+    ID = [0-9][0-9]*                         ex. base 10 numbers
+    ID_list = (ID | ID-ID)[, (ID | ID-ID)]*  ex. 0,2-4,7
+    GPU_list = ID_list                       ex. 0,2-4,7
+    CU_list = 0x[0-F]* | ID_list             ex. 0x337F OR 0,2-4,7
+    CU_Set = GPU_list : CU_list              ex. 0,2-4,7:0-15,32-47 OR 0,2-4,7:0x337F
+    HSA_CU_MASK = CU_Set [; CU_Set]*         ex. 0,2-4,7:0-15,32-47; 3-9:0x337F
+
+The GPU indices are taken post ``ROCR_VISIBLE_DEVICES`` reordering. The listed or masked CUs are enabled for listed GPUs, and the others are disabled. Unlisted GPUs are not be affected, and their CUs are enabled.
+
+The variable parsing stops when a syntax error occurs. The erroneous set and the following are ignored. Repeating GPU or CU IDs results in a syntax error. Specifying a mask with no usable CUs (CU_list is 0x0) results in a syntax error. To exclude GPU devices, use ``ROCR_VISIBLE_DEVICES``.
+
+.. note::
+
+    These environment variables only affect ROCm software, not graphics applications.
+
+Not all CU configurations are valid on all devices. For example, on devices where two CUs can be combined into a WGP (for kernels running in WGP mode), it’s not valid to disable only a single CU in a WGP. For more information about what to expect when disabling CUs, see the `Exploring AMD GPU Scheduling Details by Experimenting With “Worst Practices” <https://www.cs.unc.edu/~otternes/papers/rtsj2022.pdf>`_ paper.
+
--- a/docs/index.md
+++ b/docs/index.md
@@ -25,7 +25,6 @@ Our documentation is organized into the following categories:
 :class-container: rocm-doc-grid

 :::{grid-item-card}
-:class-card: sd-text-black
 :img-top: ./data/banner-installation.jpg
 :img-alt: Install documentation
 :padding: 2
@@ -34,20 +33,18 @@ Our documentation is organized into the following categories:
  * {doc}`Quick start guide<rocm-install-on-linux:tutorial/quick-start>`
  * {doc}`Linux install guide<rocm-install-on-linux:how-to/native-install/index>`
  * {doc}`Package manager integration<rocm-install-on-linux:how-to/native-install/package-manager-integration>`
+  * {doc}`Install Docker containers<rocm-install-on-linux:how-to/docker>`
+  * {doc}`ROCm & Spack<rocm-install-on-linux:how-to/spack>`
 * Windows
  * {doc}`Windows install guide<rocm-install-on-windows:how-to/install>`
  * {doc}`Application deployment guidelines<rocm-install-on-windows:conceptual/deployment-guidelines>`
 * [Deep learning frameworks](./how-to/deep-learning-rocm.rst)
-  * {doc}`Install Docker containers<rocm-install-on-linux:how-to/docker>`
  * {doc}`PyTorch for ROCm<rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
  * {doc}`TensorFlow for ROCm<rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
  * {doc}`JAX for ROCm<rocm-install-on-linux:how-to/3rd-party/jax-install>`
-  * {doc}`MAGMA for ROCm<rocm-install-on-linux:how-to/3rd-party/magma-install>`
-  * {doc}`ROCm & Spack<rocm-install-on-linux:how-to/spack>`
 :::

 :::{grid-item-card}
-:class-card: sd-text-black
 :img-top: ./data/banner-compatibility.jpg
 :img-alt: Compatibility information
 :padding: 2
@@ -65,7 +62,6 @@ Our documentation is organized into the following categories:

 <!-- markdownlint-disable MD051 -->
 :::{grid-item-card}
-:class-card: sd-text-black
 :img-top: ./data/banner-reference.jpg
 :img-alt: Reference documentation
 :padding: 2
@@ -81,18 +77,19 @@ Our documentation is organized into the following categories:
  * [Development](#development-tools)
  * [Performance analysis](#performance-analysis)
  * [System](#system-tools)
+* [Environment Variables](./reference/env-variables.rst)
 * [Hardware specifications](./reference/gpu-arch-specs.rst)
 :::
 <!-- markdownlint-enable MD051 -->

 :::{grid-item-card}
-:class-card: sd-text-black
 :img-top: ./data/banner-howto.jpg
 :img-alt: How-to documentation
 :padding: 2

 * [Using ROCm for AI](./how-to/rocm-for-ai/index.rst)
-* [Fine-tuning LLMs and inference optimization](./how-to/fine-tuning-llms/index.rst)
+* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
+* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
 * [System tuning for various architectures](./how-to/tuning-guides.md)
  * [MI100](./how-to/tuning-guides/mi100.md)
  * [MI200](./how-to/tuning-guides/mi200.md)
@@ -102,12 +99,12 @@ Our documentation is organized into the following categories:
  * [Using AddressSanitizer](./conceptual/using-gpu-sanitizer.md)
  * [Compiler disambiguation](./conceptual/compiler-disambiguation.md)
  * [OpenMP support in ROCm](./about/compatibility/openmp.md)
+* [Setting the number of CUs](./how-to/setting-cus)  
 * [System level debugging](./how-to/system-debugging.md)
 * [GitHub examples](https://github.com/amd/rocm-examples)
 :::

 :::{grid-item-card}
-:class-card: sd-text-black
 :img-top: ./data/banner-conceptual.jpg
 :img-alt: Conceptual documentation
 :padding: 2
@@ -117,7 +114,6 @@ Our documentation is organized into the following categories:
  * [MI250](./conceptual/gpu-arch/mi250.md)
  * [MI300](./conceptual/gpu-arch/mi300.md)
 * [GPU memory](./conceptual/gpu-memory.md)
-* [Setting the number of CUs](./conceptual/setting-cus)
 * [File structure (Linux FHS)](./conceptual/file-reorg.md)
 * [GPU isolation techniques](./conceptual/gpu-isolation.md)
 * [Using CMake](./conceptual/cmake-packages.rst)
--- a/docs/reference/api-libraries.md
+++ b/docs/reference/api-libraries.md
@@ -22,7 +22,7 @@
 * {doc}`Composable Kernel <composable_kernel:index>`
 * {doc}`MIGraphX <amdmigraphx:index>`
 * {doc}`MIOpen <miopen:index>`
-* {doc}`MIVisionX <mivisionx:doxygen/html/index>`
+* {doc}`MIVisionX <mivisionx:index>`
 * {doc}`rocAL <rocal:index>`
 * {doc}`rocDecode <rocdecode:index>`
 * {doc}`ROCm Performance Primitives (RPP) <rpp:index>`
--- a/docs/reference/env-variables.rst
+++ b/docs/reference/env-variables.rst
@@ -0,0 +1,920 @@
+.. meta::
+    :description: Environment variables reference
+    :keywords: AMD, ROCm, environment variables, environment, reference
+
+.. role:: cpp(code)
+   :language: cpp
+
+.. _env-variables-reference:
+
+*************************************************************
+ROCm environment variables
+*************************************************************
+
+The following table lists the most commonly used environment variables in the ROCm software stack. These variables help to perform simple tasks such as building a ROCm library or running applications on AMDGPUs.
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Value**
+
+    * - | ``HIP_PATH``
+        | The path of the HIP SDK on Microsoft Windows.
+      - Default: ``C:/hip``
+
+    * - | ``HIP_DIR``
+        | The path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
+      - Default: ``C:/hip``
+
+    * - | ``ROCM_PATH``
+        | The path of the installed ROCm software stack on Linux.
+      - Default: ``/opt/rocm``
+
+    * - | ``HIP_PLATFORM``
+        | The platform targeted by HIP. If ``HIP_PLATFORM`` is not set, then HIPCC attempts to auto-detect the platform, if it can find NVCC.
+      - ``amd``, ``nvidia``
+
+CLR environment variables
+=========================
+
+AMD Common Language Runtime (:doc:`CLR <hip:understand/amd_clr>`) library contains source codes for AMD's compute languages runtimes: 
+
+*  ``hipamd``: Contains implementation of HIP language on the AMD platform.
+* ``opencl``: Contains implementation of `OpenCL™ <https://www.khronos.org/opencl/>`_ on AMD platform. It is hosted at `clr/opencl <https://github.com/ROCm/clr/tree/develop/opencl>`_.
+* ``rocclr``: Contains common runtime used in HIP and OpenCL. This is hosted at `clr/rocclr <https://github.com/ROCm/clr/tree/develop/rocclr>`_.
+
+The environment variables affecting the CLR library might affect HIP and OpenCL libraries or applications.
+
+The following table lists the environment variables that affect ``opencl`` and ``hipamd``:
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Value**
+
+    * - | ``ROCM_LIBPATCH_VERSION``
+        | The ROCm version in the integer format. The format is
+        | :cpp:`MAJOR * 10000 + MINOR * 100 + PATCH`
+      - 50000, 60020...
+
+    * - | ``CPACK_DEBIAN_PACKAGE_RELEASE``
+        | This is the numbering of the Debian package itself, i.e., the version of the packaging and not the version of the content.
+      - 1, 2, 3...
+
+    * - | ``CPACK_RPM_PACKAGE_RELEASE``
+        | This is the numbering of the RPM package itself, i.e., the version of the packaging and not the version of the content.
+      - 1, 2, 3...
+
+The following table lists the environment variables that affect ``hipamd``:
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Value**
+
+    * - | ``HIP_FORCE_QUEUE_PROFILING``
+        | Simulates the application to run in rocprof by forcing command queue profiling to ``on`` by default.
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HSA_OVERRIDE_GFX_VERSION``
+        | Overrides the target version; used to enable HIP usage on unsupported hardware.
+      - 11.0.0, 10.3.0
+
+    * - | ``HSA_DISABLE_CACHE``
+        | Disables the L2 cache.
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HSAKMT_DEBUG_LEVEL``
+        | When set to the highest level, the system prints memory allocation information.
+      - 1, 2, ... 7
+
+The following table lists the environment variables that affect ``rocclr``:
+
+.. https://github.com/ROCm/clr/blob/develop/rocclr/utils/flags.hpp
+
+.. list-table::
+    :header-rows: 1
+    :widths: 35,14,51
+
+    * - **Environment variable**
+      - **Default value**
+      - **Value**
+
+    * - | ``AMD_CPU_AFFINITY``
+        | Resets CPU affinity of any runtime threads
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``AMD_DIRECT_DISPATCH``
+        | Enables direct kernel dispatch. Currently available on Linux; under development for Windows.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``AMD_GPU_FORCE_SINGLE_FP_DENORM``
+        | Forces denormalization for single precision.
+      - ``-1``
+      - | -1: Don't force 
+        | 0: Disable
+        | 1: Enable
+
+    * - | ``AMD_LOG_LEVEL``
+        | Enables HIP log on various level.
+      - ``0``
+      - | 0: Disable log.
+        | 1: Enables log on error level.
+        | 2: Enables log on warning and lower levels.
+        | 3: Enables log on information and lower levels.
+        | 4: Enables log on debug and lower levels.
+
+    * - | ``AMD_LOG_LEVEL_FILE``
+        | Sets output file for ``AMD_LOG_LEVEL``.
+      - stderr output
+      - 
+
+    * - | ``AMD_LOG_MASK``
+        | Specifies HIP log filters. Here is the ` complete list of log masks <https://github.com/ROCm/clr/blob/develop/rocclr/utils/debug.hpp#L40>`_.
+      - ``0x7FFFFFFF``
+      - | 0x1: Log API calls.
+        | 0x2: Kernel and copy commands and barriers.
+        | 0x4: Synchronization and waiting for commands to finish.
+        | 0x8: Decode and display AQL packets.
+        | 0x10: Queue commands and queue contents.
+        | 0x20: Signal creation, allocation, pool.
+        | 0x40: Locks and thread-safety code.
+        | 0x80: Kernel creations and arguments, etc.
+        | 0x100: Copy debug.
+        | 0x200: Detailed copy debug.
+        | 0x400: Resource allocation, performance-impacting events.
+        | 0x800: Initialization and shutdown.
+        | 0x1000: Misc debug, not yet classified.
+        | 0x2000: Show raw bytes of AQL packet.
+        | 0x4000: Show code creation debug.
+        | 0x8000: More detailed command info, including barrier commands.
+        | 0x10000: Log message location.
+        | 0x20000: Memory allocation.
+        | 0x40000: Memory pool allocation, including memory in graphs.
+        | 0x80000: Timestamp details.
+        | 0xFFFFFFFF: Log always even mask flag is zero.
+
+    * - | ``AMD_OCL_BUILD_OPTIONS``
+        | Sets the options for ``clBuildProgram`` and ``clCompileProgram``. This variable overrides the previously set options.
+      - None
+      - 
+
+    * - | ``AMD_OCL_BUILD_OPTIONS_APPEND``
+        | Appends the options for ``clBuildProgram`` and ``clCompileProgram``.
+      - None
+      - 
+
+    * - | ``AMD_OCL_LINK_OPTIONS``
+        | Sets the options for ``clLinkProgram``.
+      - None
+      - 
+
+    * - | ``AMD_OCL_LINK_OPTIONS_APPEND``
+        | Appends the options for ``clLinkProgram``.
+      - None
+      - 
+
+    * - | ``AMD_OCL_WAIT_COMMAND``
+        | Enforces a wait for every submitted command.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``OCL_SET_SVM_SIZE``
+        | Sets shared virtual memory (SVM) space size in bytes for discrete GPUs.
+      - ``65536``
+      -
+
+    * - | ``OCL_STUB_PROGRAMS``
+        | Enables OCL programs stubing.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``OPENCL_VERSION``
+        | Force GPU OpenCL version.
+      - ``200``
+      - 
+
+    * - | ``AMD_OPT_FLUSH``
+        | Sets kernel flush option.
+      - ``0x1``
+      - | ``0x0`` = Uses system-scope fence operations.
+        | ``0x1`` = Uses device-scope fence operations when possible.
+
+    * - | ``AMD_SERIALIZE_COPY``
+        | Controls serialization of copies
+      - ``0``
+      - | 0: Disable
+        | 1: Waits for completion before enqueue.
+        | 2: Waits for completion after enqueue.
+        | 3: Both
+
+    * - | ``AMD_SERIALIZE_KERNEL``
+        | Serializes kernel enqueue.
+      - ``0``
+      - | 0: Disable
+        | 1: Waits for completion before enqueue.
+        | 2: Waits for completion after enqueue.
+        | 3: Both
+
+    * - | ``AMD_THREAD_TRACE_ENABLE``
+        | Enables thread trace extension.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``CL_KHR_FP64``
+        | Controls support for double precision.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``CQ_THREAD_STACK_SIZE``
+        | The default command queue thread stack size in Bytes.
+      - ``262144``: 256 KB
+      -
+
+    * - | ``CUDA_VISIBLE_DEVICES``
+        | The visible devices to HIP (whose indices are present in the sequence)
+      - None
+      - ``0,1,2``: List of the device indices. Depending on the number of devices in the system.
+
+    * - | ``DEBUG_CLR_GRAPH_PACKET_CAPTURE``
+        | Controls capturing of graph packets.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``DEBUG_CLR_LIMIT_BLIT_WG``
+        | Sets the limit for the number of workgroups in blit operations.
+      - ``16``
+      -
+
+    * - | ``DISABLE_DEFERRED_ALLOC``
+        | Controls deferred memory allocation on device.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ADD_HBCC_SIZE``
+        | Adds HBCC size to the reported device memory.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ANALYZE_HANG``
+        | Allows you to analyze GPU hang issue.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_BLIT_ENGINE_TYPE``
+        | Specifies blit engine type.
+      - ``0``
+      - | 0: Default
+        | 1: Host
+        | 2: CAL
+        | 3: Kernel
+
+    * - | ``GPU_CP_DMA_COPY_SIZE``
+        | Set maximum size of CP DMA copy in KB.
+      - ``1``
+      -
+
+    * - | ``GPU_DEBUG_ENABLE``
+        | Enables collection of extra information for debugger at the cost of performance.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_DEVICE_ORDINAL``
+        | Selects the device ordinal, which is a comma separated list of available devices.
+      - None
+      - A value of ``0,2`` exposes devices 1 and 3 in the system.
+
+    * - | ``GPU_DUMP_BLIT_KERNELS``
+        | Controls dumping of the kernels for blit manager.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_DUMP_CODE_OBJECT``
+        | Controls dumping of code object.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ENABLE_COOP_GROUPS``
+        | Enables cooperative group launch.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ENABLE_HW_P2P``
+        | Enables hardware peer to peer (P2P) path.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ENABLE_LC``
+        | Enables LC path.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ENABLE_PAL``
+        | Specifies platform abstraction library (PAL) backend.
+      - ``2``
+      - | 0: ROC
+        | 1: PAL
+        | 2: ROC or PAL
+
+    * - | ``GPU_ENABLE_WAVE32_MODE``
+        | Enables Wave32 compilation in hardware, if available.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_ENABLE_WGP_MODE``
+        | Enables WGP Mode in hardware, if available. Workgroups of waves are
+        | dispatched in one of the two modes: CU or WGP.
+      - ``1``
+      - | 0: CU mode. The waves of a workgroup are distributed across just two SIMD32’s.
+        | 1: WGP mode. The waves of a workgroup are distributed across all 4 SIMD32’s within a workgroup.
+
+    * - | ``GPU_FORCE_BLIT_COPY_SIZE``
+        | Specifies the threshold size in KB, under which blit is forced instead of system direct memory access (SDMA).
+      - ``0``
+      -
+
+    * - | ``GPU_FORCE_QUEUE_PROFILING``
+        | Forces command queue profiling.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_FLUSH_ON_EXECUTION``
+        | Submits commands to hardware on every operation.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_IMAGE_BUFFER_WAR``
+        | Enables image buffer workaround.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_IMAGE_DMA``
+        | Enables DRM DMA for image transfers.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_MAX_COMMAND_BUFFERS``
+        | Sets the maximum number of command buffers allocated per queue.
+      - ``8``
+      -
+
+    * - | ``GPU_MAX_HEAP_SIZE``
+        | Sets the maximum size of the GPU heap (in percentage) on the board memory.
+      - ``100``
+      -
+
+    * - | ``GPU_MAX_HW_QUEUES``
+        | Sets the maximum number of hardware queues to be allocated per device.
+      - ``4``
+      - This variable controls how many independent hardware queues HIP runtime can create per process, per device. If an application allocates more HIP streams than the specified value, then HIP runtime reuses the same hardware queues for the new streams in a round-robin manner. Note that this value doesn't apply to hardware queues that are created for CU-masked HIP streams or cooperative queues for HIP cooperative groups (single queue per device).
+
+    * - | ``GPU_MAX_REMOTE_MEM_SIZE``
+        | Sets the maximum size in KB for device memory substitution with the system.
+      - ``2``
+      -
+
+    * - | ``GPU_MAX_SUBALLOC_SIZE``
+        | Sets the maximum size for sub-allocations in KB.
+      - ``4096``
+      -
+
+    * - | ``GPU_MAX_USWC_ALLOC_SIZE``
+        | Sets the maximum uncacheable speculative write combining (USWC) allocation size in MB.
+      - ``2048``
+      - -1: No limit
+
+    * - | ``GPU_MAX_WORKGROUP_SIZE``
+        | Sets the maximum number of workitems in a workgroup for GPU.
+      - ``0``: Sets no limit on workitems.
+      -
+
+    * - | ``GPU_MIPMAP``
+        | Enables GPU mipmap extension.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_NUM_COMPUTE_RINGS``
+        | Sets the number of GPU compute rings.
+      - ``2``
+      - | 0: Disable
+        | Any other number corresponds to the number of compute rings.
+
+    * - | ``GPU_NUM_MEM_DEPENDENCY``
+        | Sets the number of memory objects for dependency tracking.
+      - ``256``
+      -
+
+    * - | ``GPU_PINNED_MIN_XFER_SIZE``
+        | Sets the minimum buffer size (in MB) for pinned read and write transfers.
+      - ``128``
+      -
+
+    * - | ``GPU_PINNED_XFER_SIZE``
+        | Sets the buffer size (in MB) for pinned read and write transfers.
+      - ``32``
+      -
+
+    * - | ``GPU_PRINT_CHILD_KERNEL``
+        | Specifies the number of child kernels to be printed.
+      - ``0``
+      -
+
+    * - | ``GPU_RESOURCE_CACHE_SIZE``
+        | Sets the resource cache size in MB.
+      - ``64``
+      -
+
+    * - | ``GPU_SINGLE_ALLOC_PERCENT``
+        | Sets the maximum size of a single allocation as a percentage of  the total.
+      - ``85``
+      - 
+
+    * - | ``GPU_STAGING_BUFFER_SIZE``
+        | Sets the GPU staging buffer size in MB.
+      - ``4``
+      -
+
+    * - | ``GPU_STREAMOPS_CP_WAIT``
+        | Forces the stream memory operation to wait on command processor (CP).
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_USE_DEVICE_QUEUE``
+        | Controls use of dedicated device queue for the actual submissions.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``GPU_WAVES_PER_SIMD``
+        | Forces the number of waves per SIMD.
+      - ``0``
+      - 1-10
+
+    * - | ``GPU_XFER_BUFFER_SIZE``
+        | Sets the transfer buffer size for image copy optimization in KB.
+      - ``0``
+      -
+        
+    * - | ``HIP_FORCE_DEV_KERNARG``
+        | Forces device memory for kernel arguments.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HIP_HIDDEN_FREE_MEM``
+        | Specifies the amount of memory to hide from the free memory reported by ``hipMemGetInfo``.
+      - ``0``: Disable
+      -
+
+    * - | ``HIP_HOST_COHERENT``
+        | Specifies if the memory is coherent between the host and GPU in ``hipHostMalloc``.
+      - ``0``
+      - | 0: Memory is not coherent.
+        | 1: Memory is coherent.
+        | Environment variable has effect, if the following conditions are statisfied:
+        | - One of the ``hipHostMallocDefault``, ``hipHostMallocPortable``,  ``hipHostMallocWriteCombined`` or ``hipHostMallocNumaUser`` flag set to 1.
+        | - ``hipHostMallocCoherent``, ``hipHostMallocNonCoherent`` and ``hipHostMallocMapped`` flags set to 0.
+
+    * - | ``HIP_INITIAL_DM_SIZE``
+        | Sets the initial heap size for device malloc.
+      - ``8388608``: 8 MB
+      -
+
+    * - | ``HIP_LAUNCH_BLOCKING``
+        | Controls serialization of kernel execution.
+      - ``0``
+      - | 0: Disable. Kernel executes normally.
+        | 1: Enable. Serializes kernel execution; behaves similar to ``AMD_SERIALIZE_KERNEL``.
+
+    * - | ``HIP_MEM_POOL_SUPPORT``
+        | Enables memory pool support in HIP.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HIP_MEM_POOL_USE_VM``
+        | Enables memory pool support in HIP.
+      - | ``0``: Default value on other OS.
+        | ``1``: Default value on Microsoft Windows.
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HIP_USE_RUNTIME_UNBUNDLER``
+        | Controls use of runtime code object unbundler.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HIP_VISIBLE_DEVICES``
+        | Specifies the indices of the devices allowed to be visible to HIP.
+      - None
+      - 0,1,2: Depending on the number of devices on the system.
+
+    * - | ``HIP_VMEM_MANAGE_SUPPORT``
+        | Enables virtual memory management support.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HIPCC_VERBOSE``
+        | Controls the extra information to be displayed during the build such as compiler commands with flags, paths and arguments.
+      - ``0``
+      - | 0x1: Print detailed compiler commands.
+        | 0x2: Print HIP, ROCm and CUDA paths (``HIP_PATH``, ``ROCM_PATH``, ``HIP_CLANG_PATH``, ...). 
+        | 0x4: Print HIPCC arguments.
+
+    * - | ``HIPRTC_COMPILE_OPTIONS_APPEND``
+        | Sets compile options needed for ``hiprtc`` compilation.
+      - None
+      - ``--gpu-architecture=gfx906:sramecc+:xnack``, ``-fgpu-rdc``
+
+    * - | ``HIPRTC_LINK_OPTIONS_APPEND``
+        | Sets link options needed for ``hiprtc`` compilation.
+      - None
+      - 
+
+    * - | ``HIPRTC_USE_RUNTIME_UNBUNDLER``
+        | Forces runtime unbundler in hiprtc.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HSA_KERNARG_POOL_SIZE``
+        | Sets the pool size for kernel arguments.
+      - ``1048576``: 1 MB
+      -
+
+    * - | ``HSA_LOCAL_MEMORY_ENABLE``
+        | Enables use of local memory on HSA device.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``PAL_DISABLE_SDMA``
+        | Disables SDMA for PAL.
+      - ``0``
+      - | 0: Enable SDMA for PAL.
+        | 1: Disable SDMA for PAL.
+
+    * - | ``PAL_MALL_POLICY``
+        | Controls the behaviour of allocations with respect to the MALL.
+      - ``0``
+      - | 0: MALL policy is decided by KMD.
+        | 1: Allocations are never put through the MALL.
+        | 2: Allocations will always be put through the MALL.
+
+    * - | ``PAL_ALWAYS_RESIDENT``
+        | Forces memory resources to become resident during allocation.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``PAL_EMBED_KERNEL_MD``
+        | Enables writing kernel metadata into command buffers.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``PAL_FORCE_ASIC_REVISION``
+        | Forces a specific ASIC revision on all devices.
+      - ``0``
+      -
+
+    * - | ``PAL_HIP_IPC_FLAG``
+        | Enables inter-process flag for device allocation in PAL HIP.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``PAL_PREPINNED_MEMORY_SIZE``
+        | Sets the size in KB of pre-pinned memory.
+      - ``64``
+      -
+
+    * - | ``PAL_RGP_DISP_COUNT``
+        | Sets the number of dispatches for RGP capture with SQTT.
+      - ``10000``
+      -
+
+    * - | ``REMOTE_ALLOC``
+        | Enables use of remote memory for the global heap allocation.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROC_ACTIVE_WAIT_TIMEOUT``
+        | Forces active wait of GPU interrupt for the timeout in us.
+      - ``0``
+      -
+
+    * - | ``ROC_AQL_QUEUE_SIZE``
+        | Sets the AQL queue size in bytes in the AQL packets.
+      - ``16384``: 16 KB
+      -
+
+    * - | ``ROC_CPU_WAIT_FOR_SIGNAL``
+        | Enable CPU wait for dependent HSA signals.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROC_ENABLE_LARGE_BAR``
+        | Enable large bar if supported by the device.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROC_GLOBAL_CU_MASK``
+        | Sets a global CU mask, entered as hex value for all queues. Each active bit represents one CU, e.g., ``0xf`` enables 4 CUs.
+      - None
+      - 
+
+    * - | ``ROC_HMM_FLAGS``
+        | Sets ROCm HMM configuration flags.
+      - ``0``: Disabled
+      - 
+
+    * - | ``ROC_P2P_SDMA_SIZE``
+        | Sets the minimum size in KB for peer to peer (P2P) transfer with SDMA.
+      - ``1024``: 1 MB
+      -
+
+    * - | ``ROC_SIGNAL_POOL_SIZE``
+        | Sets the initial size for HSA signal pool.
+      - ``32``
+      - 
+
+    * - | ``ROC_SKIP_KERNEL_ARG_COPY``
+        | Allows the runtime to skip kernel argument copy.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROC_SYSTEM_SCOPE_SIGNAL``
+        | Enable system scope for signals, uses interrupts.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROC_USE_FGS_KERNARG``
+        | Enables use of fine grain kernel arguments segment for supported ASICs.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``ROCPROFILER_REGISTER_ROOT``
+        | Sets the path to ``rocProfiler``.
+      - None
+      - 
+
+The following table lists the debug environment variables that affect ``rocclr`` of the CLR project. These environment variables can only be set during DEBUG build.
+
+.. list-table::
+    :header-rows: 1
+    :widths: 35,14,51
+
+    * - **Environment variable**
+      - **Default value**
+      - **Value**
+
+    * - | ``AMD_OCL_SUBST_OBJFILE``
+        | Specifies binary substitution config file for OpenCL.
+      - None
+      - 
+
+    * - | ``CPU_MEMORY_ALIGNMENT_SIZE``
+        | Sets the size in bytes for the default alignment of guarded memory on CPU.
+      - ``256``
+      -
+
+    * - | ``CPU_MEMORY_GUARD_PAGE_SIZE``
+        | Size of the CPU memory guard page in KB.
+      - ``64``: 64 KB
+      -
+
+    * - | ``CPU_MEMORY_GUARD_PAGES``
+        | Enables using guard pages for CPU memory.
+      - ``0``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``MEMOBJ_BASE_ADDR_ALIGN``
+        | Alignment of the base address of any allocate memory object.
+      - ``4096``: 4 KB
+      -
+
+    * - | ``PARAMETERS_MIN_ALIGNMENT``
+        | Specifies the minimum alignment required for the abstract parameters stack.
+      - 64 at ``__AVX512F__``, 32 at ``__AVX__`` and 16 in other cases
+      -
+
+ROCR-Runtime environment variables
+==================================
+
+.. https://github.com/ROCm/ROCR-Runtime/blob/master/src/core/util/flag.h
+.. We need to extend the following list.
+
+The following table lists the ROCR-Runtime environment variables:
+
+.. list-table::
+    :header-rows: 1
+    :widths: 35,14,51
+
+    * - **Environment variable**
+      - **Default value**
+      - **Value**
+
+    * - | ``ROCR_VISIBLE_DEVICES``
+        | Specifies a list of device indices or UUIDs to be exposed to the applications.
+      - None
+      - ``0,GPU-DEADBEEFDEADBEEF``
+
+    * - | ``HSA_SCRATCH_MEM``
+        | Specifies the maximum amount of scratch memory that can be used per process per GPU.
+      -
+      -
+
+    * - | ``HSA_XNACK``
+        | Enables XNACK.
+      - None
+      - 1: Enable
+
+    * - | ``HSA_CU_MASK``
+        | Sets the mask on a lower level of queue creation in the driver. 
+        | This mask is also applied to the queues being profiled.
+      - None
+      - ``1:0-8``
+
+    * - | ``HSA_ENABLE_SDMA``
+        | Enables the use of direct memory access (DMA) engines in all copy directions (Host-to-Device, Device-to-Host, Device-to-Device), when using any of the following APIs:
+        | ``hsa_memory_copy``, 
+        | ``hsa_amd_memory_fill``, 
+        | ``hsa_amd_memory_async_copy``, 
+        | ``hsa_amd_memory_async_copy_on_engine``.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+    * - | ``HSA_ENABLE_PEER_SDMA``
+        | Enables the use of DMA engines for Device-to-Device copies, when using any of the following APIs:
+        | ``hsa_memory_copy``,
+        | ``hsa_amd_memory_async_copy``,
+        | ``hsa_amd_memory_async_copy_on_engine``.
+      - ``1``
+      - | 0: Disable
+        | 1: Enable
+
+Note that this environment variable is ignored if ``HSA_ENABLE_SDMA`` is set to 0.
+
+rocPRIM environment variables
+=============================
+
+The following table lists the environment variables used in the rocPRIM library.
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Default value**
+
+    * - | ``HIP_PATH``
+        | Specifies the path of the HIP SDK on Microsoft Windows.
+      - ``C:/hip``
+
+    * - | ``HIP_DIR``
+        | Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
+      - ``C:/hip``
+
+    * - | ``VCPKG_PATH``
+        | Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
+      - ``C:/github/vcpkg``
+
+    * - | ``ROCM_PATH``
+        | Specifies the path of the installed ROCm software stack on Linux.
+      - ``/opt/rocm``
+
+    * - | ``ROCM_CMAKE_PATH``
+        | Specifies the path of the installed ROCm ``cmake`` file on Microsoft Windows.
+      - ``C:/hipSDK``
+
+    * - | ``HIPCC_COMPILE_FLAGS_APPEND``
+        | Enables extra ``amdclang++`` compiler flags on Linux. This environment variable is ignored if ``CXX`` environment variable is set.
+      - None
+
+    * - | ``ROCPRIM_USE_HMM``
+        | Enables the test suite to use unified memory, when set to 1 during the tests.
+      - None
+
+    * - | ``CTEST_RESOURCE_GROUP_0``
+        | Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
+      - None
+
+hipCUB environment variables
+============================
+
+The following table lists the environment variables used in the hipCUB library.
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Default value**
+
+    * - | ``HIP_PATH``
+        | Specifies the path of the HIP SDK on Microsoft Windows.
+      - ``C:/hip``
+
+    * - | ``HIP_DIR``
+        | Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
+      - ``C:/hip``
+
+    * - | ``VCPKG_PATH``
+        | Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
+      - ``C:/github/vcpkg``
+
+    * - | ``ROCM_PATH``
+        | Specifies the path of the installed ROCm software stack on Linux.
+      - ``/opt/rocm``
+
+    * - | ``HIPCC_COMPILE_FLAGS_APPEND``
+        | Enables extra ``amdclang`` or ``amdclang++`` compiler flags on Linux. This environment variable is ignored if ``CXX`` or ``CC`` environment variable is set.
+      - None
+
+    * - | ``HIPCUB_USE_HMM``
+        | Enables the test suite to use unified memory, when set to 1 during the tests.
+      - None
+
+    * - | ``CTEST_RESOURCE_GROUP_0``
+        | Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
+      - None
+
+rocThrust environment variables
+===============================
+
+The following table lists the environment variables used in the rocThrust library.
+
+.. list-table::
+    :header-rows: 1
+    :widths: 70,30
+
+    * - **Environment variable**
+      - **Default value**
+
+    * - | ``HIP_PATH``
+        | Specifies the path of the HIP SDK on Microsoft Windows.
+      - ``C:/hip``
+
+    * - | ``HIP_DIR``
+        | Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
+      - ``C:/hip``
+
+    * - | ``VCPKG_PATH``
+        | Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
+      - ``C:/github/vcpkg``
+
+    * - | ``ROCM_PATH``
+        | Specifies the path of the installed ROCm software stack on Linux.
+      - ``/opt/rocm``
+
+    * - | ``ROCTHRUST_USE_HMM``
+        | Enables the test suite to use unified memory, when set to 1 during the tests.
+      - None
+
+    * - | ``CTEST_RESOURCE_GROUP_0``
+        | Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
+      - None
--- a/docs/reference/gpu-arch-specs.rst
+++ b/docs/reference/gpu-arch-specs.rst
--- a/docs/reference/rocm-tools.md
+++ b/docs/reference/rocm-tools.md
@@ -22,8 +22,8 @@
 * {doc}`HIPIFY <hipify:index>`
 * {doc}`ROCdbgapi <rocdbgapi:index>`
 * [ROCmCC](./rocmcc.md)
-* [ROCm Debug Agent](https://github.com/ROCm/rocr_debug_agent)
-* {doc}`ROCm debugger (ROCgdb) <rocgdb:index>`
+* {doc}`ROCm Debugger (ROCgdb) <rocgdb:index>`
+* {doc}`ROCr Debug Agent <rocr_debug_agent:index>`
 :::

 (performance-tools)=
--- a/docs/release/versions.md
+++ b/docs/release/versions.md
@@ -8,6 +8,7 @@

 | Version | Release date |
 | ------- | ------------ |
+| [6.1.2](https://rocm.docs.amd.com/en/docs-6.1.2/) | June 4, 2024 |
 | [6.1.1](https://rocm.docs.amd.com/en/docs-6.1.1/) | May 8, 2024 |
 | [6.1.0](https://rocm.docs.amd.com/en/docs-6.1.0/) | Apr 16, 2024 |
 | [6.0.2](https://rocm.docs.amd.com/en/docs-6.0.2/) | Jan 31, 2024 |
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -44,6 +44,8 @@ subtrees:
      title: API libraries
    - file: reference/rocm-tools.md
      title: Tools
+    - file: reference/env-variables
+      title: Environment variables
    - file: reference/gpu-arch-specs.rst
      title: Hardware specifications

@@ -58,27 +60,29 @@ subtrees:
      - file: how-to/rocm-for-ai/train-a-model.rst
      - file: how-to/rocm-for-ai/hugging-face-models.rst
      - file: how-to/rocm-for-ai/deploy-your-model.rst
-  - file: how-to/fine-tuning-llms/index.rst
+  - file: how-to/rocm-for-hpc/index.rst
+    title: Using ROCm for HPC
+  - file: how-to/llm-fine-tuning-optimization/index.rst
    title: Fine-tuning LLMs and inference optimization
    subtrees:
    - entries:
-      - file: how-to/fine-tuning-llms/overview.rst
+      - file: how-to/llm-fine-tuning-optimization/overview.rst
        title: Conceptual overview
-      - file: how-to/fine-tuning-llms/fine-tuning-and-inference.rst
+      - file: how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst
        subtrees:
        - entries:
-          - file: how-to/fine-tuning-llms/single-gpu-fine-tuning-and-inference.rst
+          - file: how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst
            title: Using a single accelerator
-          - file: how-to/fine-tuning-llms/multi-gpu-fine-tuning-and-inference.rst
+          - file: how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst
            title: Using multiple accelerators
-      - file: how-to/fine-tuning-llms/model-quantization.rst
-      - file: how-to/fine-tuning-llms/model-acceleration-libraries.rst
-      - file: how-to/fine-tuning-llms/llm-inference-frameworks.rst
-      - file: how-to/fine-tuning-llms/optimizing-with-composable-kernel.md
+      - file: how-to/llm-fine-tuning-optimization/model-quantization.rst
+      - file: how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst
+      - file: how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst
+      - file: how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md
        title: Optimizing with Composable Kernel
-      - file: how-to/fine-tuning-llms/optimizing-triton-kernel.rst
+      - file: how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst
        title: Optimizing Triton kernels
-      - file: how-to/fine-tuning-llms/profiling-and-debugging.rst
+      - file: how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst
  - file: how-to/tuning-guides.md
    title: System optimization
    subtrees:
@@ -101,6 +105,8 @@ subtrees:
        title: Compiler disambiguation
      - file: about/compatibility/openmp.md
        title: OpenMP support
+  - file: how-to/setting-cus
+    title: Setting the number of CUs  
  - file: how-to/system-debugging.md
    title: Debugging
  - url: https://github.com/amd/rocm-examples
@@ -140,8 +146,6 @@ subtrees:
            title: White paper
  - file: conceptual/gpu-memory.md
    title: GPU memory
-  - file: conceptual/setting-cus
-    title: Setting the number of CUs
  - file: conceptual/file-reorg.md
    title: File structure (Linux FHS)
  - file: conceptual/gpu-isolation.md
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1,2 +1,2 @@
-rocm-docs-core==1.2.0
-sphinx-reredirects
+rocm-docs-core==1.4.0
+sphinx-reredirects
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -92,7 +92,7 @@ requests==2.32.3
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==1.2.0
+rocm-docs-core==1.4.0
    # via -r requirements.in
 smmap==5.0.1
    # via gitdb
@@ -122,7 +122,7 @@ sphinx-external-toc==1.0.1
    # via rocm-docs-core
 sphinx-notfound-page==1.0.2
    # via rocm-docs-core
-sphinx-reredirects==0.1.3
+sphinx-reredirects==0.1.4
    # via -r requirements.in
 sphinxcontrib-applehelp==1.0.8
    # via sphinx
@@ -142,7 +142,7 @@ typing-extensions==4.12.0
    # via
    #   pydata-sphinx-theme
    #   pygithub
-urllib3==2.2.1
+urllib3==2.2.2
    # via
    #   pygithub
    #   requests
--- a/docs/sphinx/static/css/rocm_custom.css
+++ b/docs/sphinx/static/css/rocm_custom.css
@@ -1,53 +1,6 @@

 /* Adds container for big tables, used for Compatibility Matrix */

-
-/* Header row to have opaque background colour when sticky */
-.format-big-table th {
-    background-color: var(--pst-color-background);
+.format-big-table {
    white-space: nowrap;
  }
-  
-  /* Turn on borders for whole table */
-  .format-big-table th,
-  .format-big-table td {
-    border-width: 1px;
-  }
-  
-  /* .format-big-table th.head { */
-  /*   background-color: var(--pst-color-on-surface); */
-  /* } */
-  
-  /* Sticky header for table excluding the stub*/
-  .format-big-table th.head:not(.stub) {
-    position: sticky;
-    top: 3rem;
-    z-index: 1;
-  }
-
-  /* Sticky header for the head & stub: top left cell */
-  .format-big-table th.head.stub {
-    position: sticky;
-    top: 3rem;
-    z-index: 1;
-    background-color: var(--pst-color-background);
-    white-space: nowrap;
-  }
-  
-  /* Sticky for the stub column */
-  /*.format-big-table tbody th:not(:empty) {
-    position: sticky;
-    top: 3rem;
-    z-index: 2;
-  }*/
-  
-  /* Removes borders for stub column */
-  .format-big-table tbody th {
-    border-top: none;
-    border-bottom: none;
-  }
-  
-  /* For horizontal scrolling only.  Can't be combined with format-big-table container */
-  .horizontal-scrolling-container {
-    overflow-x: scroll;
-  }
--- a/docs/what-is-rocm.rst
+++ b/docs/what-is-rocm.rst
@@ -43,7 +43,7 @@ Machine Learning & Computer Vision
  ":doc:`Composable Kernel <composable_kernel:index>`", "Provides a programming model for writing performance critical kernels for machine learning workloads across multiple architectures"
  ":doc:`MIGraphX <amdmigraphx:index>`", "Graph inference engine that accelerates machine learning model inference"
  ":doc:`MIOpen <miopen:index>`", "An open source deep-learning library"
-  ":doc:`MIVisionX <mivisionx:doxygen/html/index>`", "Set of comprehensive computer vision and machine learning libraries, utilities, and applications"
+  ":doc:`MIVisionX <mivisionx:index>`", "Set of comprehensive computer vision and machine learning libraries, utilities, and applications"
  ":doc:`rocAL <rocal:index>`", "An augmentation library designed to decode and process images and videos"
  ":doc:`rocDecode <rocdecode:index>`", "High-performance SDK for access to video decoding features on AMD GPUs"
  ":doc:`ROCm Performance Primitives (RPP) <rpp:index>`", "Comprehensive high-performance computer vision library for AMD processors with HIP/OpenCL/CPU back-ends"
@@ -99,18 +99,18 @@ Tools

  ":doc:`AMD SMI <amdsmi:index>`", "C library for Linux that provides a user space interface for applications to monitor and control AMD devices"
  ":doc:`HIPIFY <hipify:index>`", "Translates CUDA source code into portable HIP C++"
-  ":doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`", "Captures the performance characteristics of buffer copying and kernel read/write operations"
+  ":doc:`ROCdbgapi <rocdbgapi:index>`", "ROCm debugger API library"
  ":doc:`ROCmCC <./reference/rocmcc>`", "Clang/LLVM-based compiler"
+  ":doc:`rocminfo <rocminfo:index>`", "Reports system information"
+  ":doc:`ROCProfiler <rocprofiler:index>`", "Profiling tool for HIP applications"
+  ":doc:`ROCTracer <roctracer:index>`", "Intercepts runtime API calls and traces asynchronous activity"
+  ":doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`", "Captures the performance characteristics of buffer copying and kernel read/write operations"
  ":doc:`ROCm CMake <rocmcmakebuildtools:index>`", "Collection of CMake modules for common build and development tasks"
  ":doc:`ROCm Data Center Tool <rdc:index>`", "Simplifies administration and addresses key infrastructure challenges in AMD GPUs in cluster and data-center environments"
-  "`ROCm Debug Agent (ROCdebug-agent) <https://github.com/ROCm/rocr_debug_agent/>`_ ", "Prints the state of all AMD GPU wavefronts that caused a queue error by sending a SIGQUIT signal to the process while the program is running"
  ":doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`", "Source-level debugger for Linux, based on the GNU Debugger (GDB)"
-  ":doc:`ROCdbgapi <rocdbgapi:index>`", "ROCm debugger API library"
-  ":doc:`rocminfo <rocminfo:index>`", "Reports system information"
  ":doc:`ROCm SMI <rocm_smi_lib:index>`", "C library for Linux that provides a user space interface for applications to monitor and control GPU applications"
  ":doc:`ROCm Validation Suite <rocmvalidationsuite:index>`", "Detects and troubleshoots common problems affecting AMD GPUs running in a high-performance computing environment"
-  ":doc:`ROCProfiler <rocprofiler:profiler_home_page>`", "Profiling tool for HIP applications"
-  ":doc:`ROCTracer <roctracer:index>`", "Intercepts runtime API calls and traces asynchronous activity"
+  ":doc:`ROCr Debug Agent <rocr_debug_agent:index>`", "Prints the state of all AMD GPU wavefronts that caused a queue error by sending a SIGQUIT signal to the process while the program is running"
  ":doc:`TransferBench <transferbench:index>`", "Utility to benchmark simultaneous transfers between user-specified devices (CPUs/GPUs)"

 Compilers
@@ -119,7 +119,6 @@ Compilers
 .. csv-table::
  :header: "Component", "Description"

-  "`AOMP <https://github.com/ROCm/aomp/>`_", "Scripted build of `LLVM <https://github.com/ROCm/llvm-project>`_ and supporting software"
  "`FLANG <https://github.com/ROCm/flang/>`_", "An out-of-tree Fortran compiler targeting LLVM"
  ":doc:`hipCC <hipcc:index>`", "Compiler driver utility that calls Clang or NVCC and passes the appropriate include and library options for the target compiler and HIP infrastructure"
  "`LLVM (amdclang) <https://github.com/ROCm/llvm-project>`_ ", "Toolkit for the construction of highly optimized compilers, optimizers, and runtime environments"
--- a/tools/autotag/templates/rocm_changes/6.0.0.md
+++ b/tools/autotag/templates/rocm_changes/6.0.0.md
@@ -281,7 +281,7 @@ Note: These complex operations are equivalent to corresponding types/functions o
      * `HIP_ROCclr`
    * NVIDIA platform
      * `HIP_PLATFORM_NVCC`
-* The [hcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hip/hcc_detail) and [nvcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hips/nvcc_detail) directories in the clr repository are removed.
+* The `hcc_detail` and `nvcc_detail` directories in the clr repository are removed.
 * Deprecated gcnArch is removed from hip device struct `hipDeviceProp_t`.
 * Deprecated `enum hipMemoryType memoryType;` is removed from HIP struct `hipPointerAttribute_t` union.

--- a/tools/autotag/templates/rocm_changes/6.1.1.md
+++ b/tools/autotag/templates/rocm_changes/6.1.1.md
@@ -3,7 +3,9 @@ ROCm™ 6.1.1 introduces minor fixes and improvements to some tools and librarie

 ### OS support

-ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
+* ROCm 6.1.1 now supports Oracle Linux. It has been tested against version 8.9 (kernel 5.15.0-205) with AMD Instinct MI300X accelerators.
+
+* ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).

 ### AMD SMI

--- a/tools/rocm-build/README.md
+++ b/tools/rocm-build/README.md
--- a/tools/rocm-build/ROCm.mk
+++ b/tools/rocm-build/ROCm.mk
@@ -15,7 +15,7 @@ ifeq (${ENABLE_ADDRESS_SANITIZER},true)
 	SANITIZER_FLAG=-a
 endif

-export INFRA_REPO:=ROCm/rocm-build
+export INFRA_REPO:=ROCm/tools/rocm-build

 OUT_DIR:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${OUT_DIR})
 ROCM_INSTALL_PATH:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${ROCM_INSTALL_PATH})
--- a/tools/rocm-build/build_amd_smi_lib.sh
+++ b/tools/rocm-build/build_amd_smi_lib.sh
--- a/tools/rocm-build/build_amdmigraphx.sh
+++ b/tools/rocm-build/build_amdmigraphx.sh
--- a/tools/rocm-build/build_aqlprofile.sh
+++ b/tools/rocm-build/build_aqlprofile.sh
--- a/tools/rocm-build/build_clang-ocl.sh
+++ b/tools/rocm-build/build_clang-ocl.sh
--- a/tools/rocm-build/build_comgr.sh
+++ b/tools/rocm-build/build_comgr.sh
--- a/tools/rocm-build/build_composable_kernel.sh
+++ b/tools/rocm-build/build_composable_kernel.sh
--- a/tools/rocm-build/build_dbgapi.sh
+++ b/tools/rocm-build/build_dbgapi.sh
--- a/tools/rocm-build/build_devicelibs.sh
+++ b/tools/rocm-build/build_devicelibs.sh
--- a/tools/rocm-build/build_half.sh
+++ b/tools/rocm-build/build_half.sh
--- a/tools/rocm-build/build_hip_on_rocclr.sh
+++ b/tools/rocm-build/build_hip_on_rocclr.sh
--- a/tools/rocm-build/build_hipblas.sh
+++ b/tools/rocm-build/build_hipblas.sh
--- a/tools/rocm-build/build_hipblaslt.sh
+++ b/tools/rocm-build/build_hipblaslt.sh
--- a/tools/rocm-build/build_hipcc.sh
+++ b/tools/rocm-build/build_hipcc.sh
--- a/tools/rocm-build/build_hipcub.sh
+++ b/tools/rocm-build/build_hipcub.sh
--- a/tools/rocm-build/build_hipfft.sh
+++ b/tools/rocm-build/build_hipfft.sh
--- a/tools/rocm-build/build_hipfort.sh
+++ b/tools/rocm-build/build_hipfort.sh
--- a/tools/rocm-build/build_hipify_clang.sh
+++ b/tools/rocm-build/build_hipify_clang.sh
--- a/tools/rocm-build/build_hiprand.sh
+++ b/tools/rocm-build/build_hiprand.sh
--- a/tools/rocm-build/build_hipsolver.sh
+++ b/tools/rocm-build/build_hipsolver.sh
--- a/tools/rocm-build/build_hipsparse.sh
+++ b/tools/rocm-build/build_hipsparse.sh
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Istvan Kiss	848159f6c3	WIP	2024-07-01 13:36:11 +02:00
Istvan Kiss	ddf810a781	WIP	2024-07-01 13:20:21 +02:00
Istvan Kiss	a7cc71df62	WIP	2024-07-01 13:14:17 +02:00
Istvan Kiss	5c4674027b	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 13:09:34 +02:00
Istvan Kiss	f111d5654c	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 13:09:23 +02:00
Istvan Kiss	7f43dbbbb7	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 13:09:13 +02:00
Istvan Kiss	526db1c474	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 12:48:13 +02:00
Istvan Kiss	d612ae390c	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 12:40:50 +02:00
Istvan Kiss	cca2dc23c0	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 12:39:37 +02:00
Istvan Kiss	0a88853ca3	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 12:39:17 +02:00
Istvan Kiss	4164cdc606	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-07-01 12:26:41 +02:00
Istvan Kiss	712e63d0ad	WIP	2024-06-29 12:52:00 +02:00
Istvan Kiss	f2adaebbcd	Change usage to value	2024-06-29 12:51:59 +02:00
Istvan Kiss	3f74a73220	Update docs/reference/env-variables.rst Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>	2024-06-29 12:51:59 +02:00
Istvan Kiss	84e3063e0a	Remove type column leftover	2024-06-29 12:51:58 +02:00
Istvan Kiss	06e8d93bf9	Update environment variables	2024-06-29 12:51:58 +02:00
Istvan Kiss	55ee1d1b95	Fix	2024-06-29 12:51:57 +02:00
Istvan Kiss	97619286df	Fix meta	2024-06-29 12:51:57 +02:00
Istvan Kiss	f0a0d4e738	Minor fixes.	2024-06-29 12:51:56 +02:00
Istvan Kiss	517b8645b4	Removed the unknow variables.	2024-06-29 12:51:56 +02:00
Istvan Kiss	367d6cdf5e	Fixes. Fixes	2024-06-29 12:51:55 +02:00
Istvan Kiss	4db7ffeb69	Update rocPRIM, hipCUB and rocThrust env variables	2024-06-29 12:51:55 +02:00
Istvan Kiss	afb343fce6	WIP on env_variables	2024-06-29 12:51:54 +02:00
Istvan Kiss	fcc99a324a	Update env variables	2024-06-29 12:51:54 +02:00
Istvan Kiss	6367a53775	Minor changes	2024-06-29 12:51:53 +02:00
Istvan Kiss	e19b947c26	Stying refactor	2024-06-29 12:51:53 +02:00
Bence Parajdi	f181f84b97	fix review commenets	2024-06-29 12:51:52 +02:00
Bence Parajdi	1f2d583372	remove non-public debug option	2024-06-29 12:51:52 +02:00
Bence Parajdi	f8d46afdd2	fix more review comments	2024-06-29 12:51:51 +02:00
Bence Parajdi	9068df3bb7	fix review comments	2024-06-29 12:51:51 +02:00
Bence Parajdi	9adeb56ebd	fix typos and add missing words to wordlist	2024-06-29 12:51:50 +02:00
Bence Parajdi	37775f2ff4	add missing env variables	2024-06-29 12:51:50 +02:00
Mátyás Aradi	9e7a8a93cd	Update based on review comments	2024-06-29 12:51:49 +02:00
Istvan Kiss	8e90fdbc4a	Initial version	2024-06-29 12:51:49 +02:00
alexxu-amd	325a2fd54c	External CI: Fix a typo from composable_kernel pipeline (#3373 ) * add libdrm-dev lib to CK dependency list * change INSTANCE_ONLY to INSTANCES_ONLY	2024-06-28 15:39:08 -04:00
Peter Park	a552f9f6b8	Add fixes to vLLM install and triton kernel optimization (#3366 ) * Add fixes to vLLM install and triton kernel optimization * Update TGI how-to remove extra step in TGI	2024-06-27 14:28:20 -04:00
Joseph Macaranas	accb1347ea	External CI: Add initial support for rocAL (#3365 )	2024-06-27 13:58:10 -04:00
alexxu-amd	699b604f00	Add INSTANCE_ONLY cmake flag; change pool to ultra; increase time limit to 3.5hr (#3275 )	2024-06-27 10:01:43 -04:00
Sam Wu	ce08245f4c	Merge pull request #3362 from peterjunpark/fix/index-styling Fix card text color in index	2024-06-26 15:43:50 -06:00
Peter Jun Park	5c9d071e85	remove card text styling	2024-06-26 14:12:25 -04:00
randyh62	356ad4ab47	remove Magma (#3361 ) * remove Magma * missed one	2024-06-26 10:00:39 -07:00
Sam Wu	57d59bfcc6	Merge pull request #3358 from samjwu/articleinfo Remove article info for moved or deleted pages	2024-06-26 09:44:49 -06:00
Sam Wu	791285772d	Remove article info for moved or deleted pages	2024-06-25 16:45:42 -06:00
abhimeda	217830fe25	added matrices artifact uploading code from rocSPARSE (#3356 )	2024-06-25 15:04:52 -04:00
randyh62	f07608bc92	added ROCm Core and AMD SMI (#3348 ) * added ROCm Core and AMD SMI * fix URLs	2024-06-21 16:36:39 -07:00
Peter Park	1435634f5c	reorder toc (#3346 )	2024-06-21 18:53:55 -04:00
Sam Wu	ee384ba0e0	Merge pull request #3345 from ROCm/dependabot/pip/docs/sphinx/sphinx-reredirects-0.1.4 Bump sphinx-reredirects from 0.1.3 to 0.1.4 in /docs/sphinx	2024-06-21 16:46:24 -06:00
dependabot[bot]	bb0090882c	Bump sphinx-reredirects from 0.1.3 to 0.1.4 in /docs/sphinx Bumps [sphinx-reredirects](https://github.com/documatt/sphinx-reredirects) from 0.1.3 to 0.1.4. - [Commits](https://github.com/documatt/sphinx-reredirects/compare/v0.1.3...v0.1.4) --- updated-dependencies: - dependency-name: sphinx-reredirects dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2024-06-21 22:37:37 +00:00
Peter Park	22e9f6f373	Add "Using ROCm for HPC" guide (#3302 ) * Add ROCm for HPC * Update index and toc * Add TMs in other tutorials * Add hpc apps table Spellcheck add stack image and fix links Add descriptions update copy Update copy add ref Finish adding app descriptions tweak descs fix line lengths * Revert "Add TMs in other tutorials" This reverts commit `08a1a80e57`. * Add links to install and compat matrix * Update HPC stack graphic and add some links Add hpc and td to wordlist fix links * Apply suggestions from Leo's review Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> Update docs/how-to/rocm-for-hpc/index.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> Update docs/how-to/rocm-for-hpc/index.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> Update docs/how-to/rocm-for-hpc/index.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> Update docs/how-to/rocm-for-hpc/index.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> Update docs/how-to/rocm-for-hpc/index.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> fix formatting Update words * update wordlist * Update hpc app descriptions with content from InfinityHub catalog	2024-06-21 16:15:18 -04:00
randyh62	d994302df7	license information updated (#3339 ) * license information updated * Young's comments * Sam's comment	2024-06-21 09:22:57 -07:00
Peter Park	9d4eb5eff2	Add RHEL 9.4 to compat matrix (#3332 ) * Add RHEL 9.4 to compat matrix * add rhel 9.4 footnote in compat matrix	2024-06-19 15:03:29 -04:00
danielsu-amd	8b95ab0a02	External CI: remove redundant rocm-examples build flags (#3331 )	2024-06-19 13:08:31 -04:00
danielsu-amd	e74245fbe4	External CI: Latest source pipeline for rocm-examples (#3317 )	2024-06-19 09:59:02 -04:00
Peter Park	778c8e2c05	Add Oracle Linux 8.9 to 6.1.1 changelog (#3327 )	2024-06-18 18:29:09 -04:00
Peter Park	361983fa48	Add OL support note to compat matrix (#3325 ) Fix footnote Footnote order Satisfy spellcheck	2024-06-18 17:32:07 -04:00
Sam Wu	3dff636d40	Merge pull request #3314 from ROCm/dependabot/pip/docs/sphinx/urllib3-2.2.2 Bump urllib3 from 2.2.1 to 2.2.2 in /docs/sphinx	2024-06-18 14:52:26 -06:00
Peter Park	1d976a1871	Add Radeon PRO dual slot to hw specs (#3318 )	2024-06-18 15:22:43 -04:00
randyh62	ebfec1b7c1	remove nvcc (#3313 ) * remove nvcc * Update CHANGELOG to match 6.0.0 template --------- Co-authored-by: Sam Wu <22262939+samjwu@users.noreply.github.com>	2024-06-18 12:11:40 -07:00
dependabot[bot]	66b71ba3c8	Bump urllib3 from 2.2.1 to 2.2.2 in /docs/sphinx Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.1 to 2.2.2. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.1...2.2.2) --- updated-dependencies: - dependency-name: urllib3 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2024-06-17 23:41:54 +00:00
Joseph Macaranas	e903ffa952	External CI: Update aqlprofile binary used for rocprofiler (#3304 )	2024-06-17 14:23:36 -04:00
Peter Park	fe1c2e9529	Update link to ROCr Debug Agent to docs portal (#3303 ) * Fix link to debug agent in what-is-rocm * ROCm --> ROCR add index * ROCR --> ROCr * Change ROCm Debug Agent to ROCr Debug Agent in docs	2024-06-14 17:52:49 -04:00
Joseph Macaranas	923141f300	External CI: Fixes for two repos to work with latest source (#3293 ) With MIOpen now building with latest source on External CI, this unblocked AMDMIGraphX from building with latest source. Determined rocMLIR also needed to be built with latest source as a dependency.	2024-06-13 11:55:40 -04:00
David Galiffi	c91e15a580	Moving `rocm-build` to the `tools` folder (#3285 ) [Why] To maintain the "pitchfork layout" convention used by the repository. [How] - Update README.md - Update INFRA_REPO in ROCm.mk - Updated to new path: ROCm/tools/rocm-build --------- Signed-off-by: David Galiffi <David.Galiffi@amd.com>	2024-06-12 17:12:06 -04:00
Peter Park	d24b3fab61	Fix ExLlama-v2 code snippet (#3281 )	2024-06-12 17:03:04 -04:00
Jeffrey Novotny	e864aa50ac	Remove AOMP from compatibility matrix (#3289 )	2024-06-12 14:17:32 -04:00
srawat	2531f0aa03	Update link to command-line argument reference (#3270 ) * Added deleted sections to openmp.md and other improvements * Update openmp.md	2024-06-12 11:53:22 -04:00
Joseph Macaranas	13e14363cc	External CI: updated MIOpen dependencies (#3278 )	2024-06-12 11:23:21 -04:00
Joseph Macaranas	664c047311	External CI: Package rocSPARSE matrices for testers to consume (#3276 )	2024-06-12 11:22:46 -04:00
Istvan Kiss	78fdcdf48d	Update docs/conceptual/setting-cus.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>	2024-06-12 16:17:42 +02:00
Peter Park	c4181b9245	Remove aomp from What is ROCm? page (#3282 )	2024-06-11 11:37:11 -04:00
alexxu-amd	7a13a6ee86	Merge pull request #3274 from ROCm/amd/alexxu12/fixStagingCI Fix hipTensor build error on develop branch	2024-06-11 11:02:26 -04:00
Joseph Macaranas	ace708935d	External CI: updated rocr_debug_agent dependencies (#3277 )	2024-06-11 10:59:13 -04:00
alexxu-amd	cff1b2b021	revert changes for manual test	2024-06-11 10:39:28 -04:00
alexxu-amd	d7eacf56e3	adjust variables for manual test	2024-06-11 10:20:54 -04:00
alexxu-amd	bddbc6b444	revert changes to see if the build still fails	2024-06-11 10:07:20 -04:00
alexxu-amd	67f04977fb	Move double dash to parameter for generic use case	2024-06-11 09:53:14 -04:00
randyh62	f500c32989	add quarantine_size_mb (#3264 ) * add quarantine_size_mb * Update docs/conceptual/using-gpu-sanitizer.md Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Update docs/conceptual/using-gpu-sanitizer.md Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * format fix * format fix again * ASAN capitalization * remove particular * indent bullets * Leo comments --------- Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>	2024-06-10 11:59:47 -07:00
alexxu-amd	3c1d39f251	revert changes to rdc	2024-06-10 14:02:57 -04:00
alexxu-amd	93f524586b	revert changes made for manual tests	2024-06-10 14:02:04 -04:00
alexxu-amd	b36de1d3d4	delete space	2024-06-10 13:59:33 -04:00
alexxu-amd	627d38412a	Revert changes to CK	2024-06-10 13:58:44 -04:00
alexxu-amd	1be99075e2	Change thread number to 32	2024-06-10 13:53:23 -04:00
alexxu-amd	05d7992361	change multithread flag	2024-06-10 13:03:53 -04:00
alexxu-amd	98f2e183a2	change pool back to MEDIUM before merge	2024-06-10 11:56:25 -04:00
alexxu-amd	ab1c62464a	change pool to high	2024-06-10 11:38:32 -04:00
alexxu-amd	2e73c56275	Update hipTensor.yml	2024-06-10 11:37:22 -04:00
Joseph Macaranas	f8151b6cb5	rocprofiler-register: Add unit testing (#3272 ) Since this component uses the base pool, does not need GPU for testing and is very quick to run, unit testing can be done within the same job.	2024-06-10 11:29:47 -04:00
alexxu-amd	52bccc1819	add variable declaration	2024-06-10 10:51:38 -04:00
alexxu-amd	2b492056ec	add multithread Flag to build-cmake to allow hipTensor pass -j16	2024-06-10 10:46:33 -04:00
alexxu-amd	b12e5c32ca	Restore hipTensor's original flag, remove GNinja	2024-06-10 10:15:05 -04:00
Joseph Macaranas	8db9220935	External CI: non-interactive apt upgrades (#3271 )	2024-06-08 22:20:11 -04:00
alexxu-amd	30851e9c85	Merge pull request #3266 from ROCm/amd/alexxu12/aptScriptTypo Fix a typo from .azuredevops/templates/steps/dependencies-other.yml	2024-06-07 13:36:37 -04:00
alexxu-amd	fdd0ed080b	fix a typo	2024-06-07 13:29:14 -04:00
Joseph Macaranas	d3f634ea33	Remove branch filter for aomp pipeline trigger (#3258 ) Previous filter was not triggering this CI pipeline when ROCm-Runtime build was triggered from a pipeline completion trigger of llvm-project.	2024-06-07 11:14:32 -04:00
Sam Wu	6c73abbaea	Merge pull request #3262 from ROCm/bb-develop-6.1.2-pr Add the manifest file for ROCm6.1.2	2024-06-06 17:07:14 -06:00
Sam Wu	c49877adc9	Merge branch 'roc-6.1.x' into develop	2024-06-06 17:06:13 -06:00
Sam Wu	49404d69f8	Merge pull request #3263 from ROCm/dependabot/pip/docs/sphinx/rocm-docs-core-1.4.0 Bump rocm-docs-core from 1.2.0 to 1.4.0 in /docs/sphinx	2024-06-06 14:18:31 -06:00
dependabot[bot]	d17e602769	Bump rocm-docs-core from 1.2.0 to 1.4.0 in /docs/sphinx Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.2.0 to 1.4.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.2.0...v1.4.0) --- updated-dependencies: - dependency-name: rocm-docs-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2024-06-06 20:04:21 +00:00
Wang, Yanyao	2fdbc8b475	Add the manifest file for ROCm6.1.2	2024-06-06 12:44:08 -07:00
Peter Park	7d3fb25725	Update links in compat matrix and what-is-rocm (#3253 ) * Update links in compat matrix and what-is-rocm * Tensorflow -> TensorFlow * Remove extra lines * Revert "Remove extra lines" This reverts commit `607c4323ac`. ROCm Debug Agent	2024-06-06 13:27:00 -04:00
alexxu-amd	8c3eaa1fda	Update hipTensor.yml	2024-06-06 11:56:08 -04:00
alexxu-amd	acca214a29	Update hipTensor.yml	2024-06-06 11:43:07 -04:00
Wang, Yanyao	b7c6671e06	Fix Markdown formate for the linter check	2024-06-05 13:44:50 -07:00
Wang, Yanyao	27bd772bbe	Update the branch of ROCm repo after testing	2024-06-05 13:44:50 -07:00
Wang, Yanyao	68c45d30b5	Build ROCm from source	2024-06-05 13:44:50 -07:00
Sam Wu	35835c4289	Fix first link in compatibility matrix table (#3239 ) * Fix first link in compatibility matrix table * Revert "Fix first link in compatibility matrix table" This reverts commit `069c5c116a`. * Remove sticky header and unused css * Remove container from hardware specs matrix --------- Co-authored-by: Peter Jun Park <peter.park@amd.com>	2024-06-05 15:48:27 -04:00
Wang, Yanyao	73b7b02c4f	Fix Markdown formate for the linter check	2024-06-05 12:15:12 -07:00
Wang, Yanyao	ba7afa9808	Update the branch of ROCm repo after testing	2024-06-05 12:15:12 -07:00
Wang, Yanyao	ae6eac2823	Build ROCm from source	2024-06-05 12:15:12 -07:00
alexxu-amd	6eb6a5bd90	change compiler from hipcc to amdclang++	2024-06-05 14:14:24 -04:00
Young Hui - AMD	55bb127e9a	fix links for MIVisionX (#3240 )	2024-06-05 11:55:11 -04:00
Sam Wu	e65e9307f5	Add 6.1.2 to version list (#3238 )	2024-06-05 11:25:35 -04:00
Peter Park	6494885359	Rename fine-tuning and optimization guide directory and fix index.md (#3242 ) * Mv fine-tuning and optimization files * Reorder index.md * Rename images directory * Fix internal links	2024-06-05 11:11:00 -04:00
Sam Wu	266f502010	Update manifest to 6.1.2	2024-06-05 11:06:24 -04:00
abhimeda	bf08674992	Built rccl using latest source code (#3230 )	2024-06-04 17:50:36 -04:00
alexxu-amd	8826b10b92	Updates cmake flag to run CK with instance_only on all gpu targets	2024-06-04 17:40:48 -04:00
alexxu-amd	a96ec80cb0	Increase timeout limites to a day for CK	2024-06-04 13:05:41 -04:00
alexxu-amd	57506ba947	upgrade pool to HIGH for CK	2024-06-04 11:59:16 -04:00
alexxu-amd	4b67c8725b	change compiler to clang++ and build for instance only	2024-06-04 11:57:18 -04:00
alexxu-amd	258e504595	change pool to medium	2024-06-04 09:52:36 -04:00
alexxu-amd	156215efcc	Upgrade pool to HIGH	2024-06-04 09:38:50 -04:00
alexxu-amd	7c448eec8f	add MI250 target to CK	2024-06-04 09:38:05 -04:00
alexxu-amd	29f9b4ab23	chang gpu target to gfx90a	2024-06-03 15:39:41 -04:00
alexxu-amd	6e99bef8f4	change pool to BASE	2024-06-03 14:42:24 -04:00
alexxu-amd	5025a03f79	change hipTensor compiler to hipcc	2024-06-03 10:39:36 -04:00
alexxu-amd	527840e502	Merge branch 'develop' of https://github.com/ROCm/ROCm into amd/alexxu12/fixStagingCI	2024-05-31 15:30:32 -04:00
amd-jmacaran	a65db6b47d	temp change for testing experimental	2024-05-31 15:25:42 -04:00
alexxu-amd	b69b997d69	Change pool to LOW	2024-05-31 14:12:57 -04:00
alexxu-amd	52f8a0ad36	change default branch to develop	2024-05-31 13:46:19 -04:00
alexxu-amd	ad9cdaa2a9	Switch to staging branch	2024-05-31 11:02:01 -04:00