Add JAX Plugin-PJRT support table 7.1.1

Merge branch 'develop' into docs/7.1.1
Updating rocm version to 7.1.1 GA (#5697 )
2026-01-09 22:58:17 -05:00 · 2025-11-26 16:51:25 +01:00 · 2025-11-26 08:47:19 -05:00 · 2025-11-26 16:08:03 +05:30 · 2025-11-25 16:30:46 -05:00 · 2025-11-25 15:37:04 -05:00
29 changed files with 1344 additions and 242 deletions
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -128,6 +128,9 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+      parameters:
+        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -152,6 +155,7 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
+          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
@@ -192,6 +196,9 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
+      parameters:
+        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -217,6 +224,7 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
+          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
--- a/.azuredevops/components/amdsmi.yml
+++ b/.azuredevops/components/amdsmi.yml
@@ -1,10 +1,29 @@
 parameters:
+- name: componentName
+  type: string
+  default: amdsmi
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
+# monorepo related parameters
+- name: sparseCheckoutDir
+  type: string
+  default: ''
+- name: triggerDownstreamJobs
+  type: boolean
+  default: false
+- name: downstreamAggregateNames
+  type: string
+  default: ''
+- name: buildDependsOn
+  type: object
+  default: null
+- name: unifiedBuild
+  type: boolean
+  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -31,7 +50,7 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: amdsmi_build_${{ job.os }}
+  - job: ${{ parameters.componentName }}_build_${{ job.os }}
    pool:
      ${{ if eq(job.os, 'ubuntu2404') }}:
        vmImage: 'ubuntu-24.04'
@@ -55,6 +74,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
@@ -65,50 +85,54 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        os: ${{ job.os }}
+        componentName: ${{ parameters.componentName }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
+        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
    #     aptPackages: ${{ parameters.aptPackages }}

- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: amdsmi_test_${{ job.os }}_${{ job.target }}
-    dependsOn: amdsmi_build_${{ job.os }}
-    condition:
-      and(succeeded(),
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-        eq(${{ parameters.aggregatePipeline }}, False)
-      )
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool: ${{ job.target }}_test_pool
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-      parameters:
-        runRocminfo: false
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: amdsmi
-        testDir: '$(Agent.BuildDirectory)'
-        testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
-        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-        os: ${{ job.os }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        environment: test
-        gpuTarget: ${{ job.target }}
+- ${{ if eq(parameters.unifiedBuild, False) }}:
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
+    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
+      condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+      variables:
+      - group: common
+      - template: /.azuredevops/variables-global.yml
+      pool: ${{ job.target }}_test_pool
+      workspace:
+        clean: all
+      steps:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          packageManager: ${{ job.packageManager }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        parameters:
+          os: ${{ job.os }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+        parameters:
+          runRocminfo: false
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        parameters:
+          componentName: ${{ parameters.componentName }}
+          testDir: '$(Agent.BuildDirectory)'
+          testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
+          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
+          os: ${{ job.os }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          environment: test
+          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -1,10 +1,29 @@
 parameters:
+- name: componentName
+  type: string
+  default: hipTensor
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
+# monorepo related parameters
+- name: sparseCheckoutDir
+  type: string
+  default: ''
+- name: triggerDownstreamJobs
+  type: boolean
+  default: false
+- name: downstreamAggregateNames
+  type: string
+  default: ''
+- name: buildDependsOn
+  type: object
+  default: null
+- name: unifiedBuild
+  type: boolean
+  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -51,7 +70,7 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: hipTensor_build_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -66,12 +85,15 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
+        ${{ if parameters.triggerDownstreamJobs }}:
+          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -85,9 +107,12 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -95,44 +120,47 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}

- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: hipTensor_test_${{ job.target }}
-    timeoutInMinutes: 90
-    dependsOn: hipTensor_build_${{ job.target }}
-    condition:
-      and(succeeded(),
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-        eq(${{ parameters.aggregatePipeline }}, False)
-      )
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool: ${{ job.target }}_test_pool
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmTestDependencies }}
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: hipTensor
-        testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
-        testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        environment: test
-        gpuTarget: ${{ job.target }}
+- ${{ if eq(parameters.unifiedBuild, False) }}:
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
+    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+      timeoutInMinutes: 90
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+      condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+      variables:
+      - group: common
+      - template: /.azuredevops/variables-global.yml
+      pool: ${{ job.target }}_test_pool
+      workspace:
+        clean: all
+      steps:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        parameters:
+          gpuTarget: ${{ job.target }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        parameters:
+          checkoutRef: ${{ parameters.checkoutRef }}
+          dependencyList: ${{ parameters.rocmTestDependencies }}
+          gpuTarget: ${{ job.target }}
+          ${{ if parameters.triggerDownstreamJobs }}:
+            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        parameters:
+          componentName: ${{ parameters.componentName }}
+          testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
+          testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          environment: test
+          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocWMMA.yml
+++ b/.azuredevops/components/rocWMMA.yml
@@ -142,7 +142,7 @@ jobs:
 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
    - job: ${{ parameters.componentName }}_test_${{ job.target }}
-      timeoutInMinutes: 270
+      timeoutInMinutes: 350
      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
      condition:
        and(succeeded(),
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -21,11 +21,25 @@ parameters:
    - libtbb-dev
    - libtiff-dev
    - libva-amdgpu-dev
+    - libva2-amdgpu
+    - mesa-amdgpu-va-drivers
    - libavcodec-dev
    - libavformat-dev
    - libavutil-dev
    - ninja-build
    - python3-pip
+    - protobuf-compiler
+    - libprotoc-dev
+- name: pipModules
+  type: object
+  default:
+    - future==1.0.0
+    - pytz==2022.1
+    - numpy==1.23
+    - google==3.0.0
+    - protobuf==3.12.4
+    - onnx==1.12.0
+    - nnef==1.0.7
 - name: rocmDependencies
  type: object
  default:
@@ -33,6 +47,7 @@ parameters:
    - aomp
    - aomp-extras
    - clr
+    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
@@ -47,6 +62,8 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
+    - rocm_smi_lib
+    - rccl
    - rocALUTION
    - rocBLAS
    - rocDecode
@@ -69,6 +86,7 @@ parameters:
    - aomp
    - aomp-extras
    - clr
+    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
@@ -83,6 +101,8 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
+    - rocm_smi_lib
+    - rccl
    - rocALUTION
    - rocBLAS
    - rocDecode
@@ -128,6 +148,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
@@ -227,5 +248,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
+        pipModules: ${{ parameters.pipModules }}
        environment: test
        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -65,6 +65,13 @@ parameters:
    - pytest
    - pytest-cov
    - pytest-xdist
+- name: rocmDependencies
+  type: object
+  default:
+    - clr
+    - llvm-project
+    - ROCR-Runtime
+    - rocprofiler-sdk
 - name: rocmTestDependencies
  type: object
  default:
@@ -101,10 +108,12 @@ jobs:
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
+    - name: ROCM_PATH
+      value: $(Agent.BuildDirectory)/rocm
    pool:
      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
@@ -119,6 +128,14 @@ jobs:
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
+    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      parameters:
+        checkoutRef: ${{ parameters.checkoutRef }}
+        dependencyList: ${{ parameters.rocmDependencies }}
+        gpuTarget: ${{ job.target }}
+        aggregatePipeline: ${{ parameters.aggregatePipeline }}
+        ${{ if parameters.triggerDownstreamJobs }}:
+            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
--- a/.azuredevops/templates/steps/dependencies-dnf.yml
+++ b/.azuredevops/templates/steps/dependencies-dnf.yml
@@ -63,6 +63,7 @@ parameters:
    libopenblas-dev: openblas-devel
    libopenmpi-dev: openmpi-devel
    libpci-dev: libpciaccess-devel
+    libsimde-dev: simde-devel
    libssl-dev: openssl-devel
    # note: libstdc++-devel is in the base packages list
    libsystemd-dev: systemd-devel
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -35,8 +35,8 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    amdsmi:
-      pipelineId: 99
-      developBranch: amd-staging
+      pipelineId: 376
+      developBranch: develop
      hasGpuTarget: false
    aomp-extras:
      pipelineId: 111
@@ -115,7 +115,7 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    hipTensor:
-      pipelineId: 105
+      pipelineId: 374
      developBranch: develop
      hasGpuTarget: true
    llvm-project:
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -139,6 +139,7 @@ EoS
 etcd
 fas
 FBGEMM
+FiLM
 FIFOs
 FFT
 FFTs
@@ -159,10 +160,12 @@ Fortran
 Fuyu
 GALB
 GAT
+GATNE
 GCC
 GCD
 GCDs
 GCN
+GCNN
 GDB
 GDDR
 GDR
@@ -181,6 +184,8 @@ Glibc
 GLXT
 Gloo
 GMI
+GNN
+GNNs
 GPG
 GPR
 GPT
@@ -250,6 +255,7 @@ Intersphinx
 Intra
 Ioffe
 JAX's
+JAXLIB
 Jinja
 JSON
 Jupyter
@@ -385,6 +391,7 @@ perf
 PEQT
 PIL
 PILImage
+PJRT
 POR
 PRNG
 PRs
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,7 +49,7 @@ for a complete overview of this release.
 * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.

 ```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions.
+See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
 ```

 ### **Composable Kernel** (1.1.0)
@@ -493,7 +493,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
  * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
 * Interactive metric descriptions in TUI analyze mode.
  * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
-* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option.
+* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option.
 * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC

 * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
@@ -579,7 +579,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
  * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
 * The profile mode crashes when `--format-rocprof-output json` is selected.
-  * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  
+    * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  

 ### **ROCm Data Center Tool** (1.2.0)

@@ -620,6 +620,14 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 - Updated PAPI module to v7.2.0b2.
 - ROCprofiler-SDK is now used for tracing OMPT API calls.

+#### Known issues
+
+* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment:
+
+```
+export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH
+```
+
 ### **rocPRIM** (4.1.0)

 #### Added
@@ -697,17 +705,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE

 ### **rocSOLVER** (3.31.0)

-#### Added
-
-* Hybrid computation support for existing routines: STEQR
-
 #### Optimized

 Improved the performance of:

-* BDSQR and downstream functions such as GESVD.
-* STEQR and downstream functions such as SYEV/HEEV.
-* LARFT and downstream functions such as GEQR2 and GEQRF.
+* LARF, LARFT, GEQR2, and downstream functions such as GEQRF.
+* STEDC and divide and conquer Eigensolvers.

 ### **rocSPARSE** (4.1.0)

--- a/RELEASE.md
+++ b/RELEASE.md
@@ -117,13 +117,12 @@ firmware, AMD GPU drivers, and the ROCm user space software.
              30.10</td>
      </tr>
      <tr>
-          <td>MI325X</td>
+          <td>MI325X<a href="#footnote2"><sup>[2]</sup></a></td>
          <td>
-              01.25.05.01<br>
              01.25.04.02
          </td>
          <td>
-              30.20.0<br>
+              30.20.0<sup>[*]</sup><br>
              30.10.2<br>
              30.10.1<br>
              30.10<br>
@@ -174,6 +173,7 @@ firmware, AMD GPU drivers, and the ROCm user space software.
 </div>

 <p id="footnote1">[1]: PLDM bundle 01.25.05.00 will be available by November 2025.</p>
+<p id="footnote2">[2]: For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.</p>

 #### AMD SMI improvement: Set power cap

@@ -317,11 +317,6 @@ matrix](../../docs/compatibility/compatibility-matrix.rst) for the complete list

 Torch-MIGraphX integrates the AMD graph inference engine with the PyTorch ecosystem. It provides a `mgx_module` object that may be invoked in the same manner as any other torch module, but utilizes the MIGraphX inference engine internally. Although Torch-MIGraphX has been available in previous releases, installable WHL files are now officially published.

-#### JAX
-
-* JAX customers can now use Llama-2 with JAX efficiently.
-* The latest public JAX repo is {fab}`github` [rocm-jax](https://github.com/ROCm/rocm-jax/tree/master).
-
 #### TensorFlow
 ROCm 7.1.0 enables support for TensorFlow 2.20.0.

@@ -740,6 +735,10 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid

 * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.

+```{note}
+See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
+```
+
 ### **Composable Kernel** (1.1.0)

 #### Added
@@ -1181,7 +1180,7 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid
  * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
 * Interactive metric descriptions in TUI analyze mode.
  * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
-* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option.
+* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option.
 * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC

 * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
@@ -1308,6 +1307,14 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid
 - Updated PAPI module to v7.2.0b2.
 - ROCprofiler-SDK is now used for tracing OMPT API calls.

+#### Known issues
+
+* Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment:
+
+```
+export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH
+```
+
 ### **rocPRIM** (4.1.0)

 #### Added
@@ -1385,17 +1392,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE

 ### **rocSOLVER** (3.31.0)

-#### Added
-
-* Hybrid computation support for existing STEQR routines.
-
 #### Optimized

 Improved the performance of:

-* BDSQR and downstream functions such as GESVD.
-* STEQR and downstream functions such as SYEV/HEEV.
-* LARFT and downstream functions such as GEQR2 and GEQRF.
+* LARF, LARFT, GEQR2, and downstream functions such as GEQRF.
+* STEDC and divide and conquer Eigensolvers.

 ### **rocSPARSE** (4.1.0)

@@ -1479,10 +1481,10 @@ issues related to individual components, review the [Detailed component changes]

 ### MIGraphX Python API will fail when running on Python 3.13

-Applications using the MIGraphX Python API will fail when running on Python 3.13 and return the error message `AttributeError: module 'migraphx' has no attribute 'parse_onnx'`. The issue doesn't occur when you manually build MIGraphX. For detailed instructions, see [Building from source](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/install/building_migraphx.html). As a workaround, change the Python version to the one found in the installed location:
+Applications using the MIGraphX Python API will fail when running on Python 3.13 and return the error message `AttributeError: module 'migraphx' has no attribute 'parse_onnx'`. The issue doesn't occur when you manually build MIGraphX. For detailed instructions, see [Building from source](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/install/install-migraphx.html#build-migraphx-from-source). As a workaround, change the Python version to the one found in the installed location:

 ```
-ls -l /opt/rocm-7.0.0/lib/libmigraphx_py_*.so
+ls -l /opt/rocm-7.1.0/lib/libmigraphx_py_*.so
 ```
 The issue will be resolved in a future ROCm release. See [GitHub issue #5500](https://github.com/ROCm/ROCm/issues/5500).

@@ -1498,6 +1500,22 @@ ROCgdb might fail when running the `step-schedlock-spurious-waves.exp` test case

 Due to a missing `rocm-core` dependency from the ROCm Bandwidth Test, you can't cleanly uninstall ROCm Bandwidth Test using the `amdgpu-install` script. As a workaround, uninstall ROCm Bandwidth Test manually, using the native package managers. For more information, see [Installation via native package manager](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager-index.html). The issue will be fixed in a future ROCm release. See [GitHub issue #5611](https://github.com/ROCm/ROCm/issues/5611).

+### OpenBLAS runtime dependency for hipblastlt-test and hipblaslt-bench
+
+Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS development package results in the following error:
+```
+libopenblas.so.0: cannot open shared object file: No such file or directory
+```
+As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. See [GitHub issue #5639](https://github.com/ROCm/ROCm/issues/5639).
+
+### Reduced precision in gemm_ex operations for rocBLAS and hipBLAS
+
+Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640).
+
+### RCCL profiler plugin failure with AllToAll operations
+
+The RCCL profiler plugin `librccl-profiler.so` might fail with a segmentation fault during `AllToAll` collective operations due to improperly assigned point-to-point task function pointers. This leads to invalid memory access and prevents profiling of `AllToAll` performance. Other operations, like `AllReduce`, are unaffected. It's recommended to avoid using the RCCL profiler plugin with `AllToAll` operations until the fix is available. This issue is resolved in the {fab}`github`[RCCL `develop` branch](https://github.com/ROCm/rccl/tree/develop) and will be part of a future ROCm release. See [GitHub issue #5653](https://github.com/ROCm/ROCm/issues/5653).
+
 ## ROCm resolved issues

 The following are previously known issues resolved in this release. For resolved issues related to
--- a/default.xml
+++ b/default.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.1.0"
+    <default revision="refs/tags/rocm-7.1.1"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
@@ -25,6 +25,7 @@
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="composable_kernel" />
+    <project groups="mathlibs" name="hipSOLVER" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
@@ -45,6 +46,7 @@
        rocprofiler rocr-runtime roctracer -->
    <project groups="mathlibs" name="rocm-systems" />
    <project groups="mathlibs" name="rocPyDecode" />
+    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocSHMEM" />
    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -32,14 +32,14 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6
      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.8, 2.7, 2.6","2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
      :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
      :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,2.4.0,2.4.0,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
      :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
      :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
      :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
-      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,b6356,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
+      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,b6652,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
      :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
      ,,,,,,,,,,,,,,,,,,,,,
@@ -53,7 +53,7 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6
      CUB,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
      ,,,,,,,,,,,,,,,,,,,,,
     DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
+      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
      ,,,,,,,,,,,,,,,,,,,,,
      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -56,7 +56,7 @@ compatibility and system requirements.
      FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
      :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.8, 2.7, 2.6","2.8, 2.7, 2.6","2.6, 2.5, 2.4, 2.3"
      :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.19.1, 2.18.1, 2.17.1 [#tf-mi350]_","2.18.1, 2.17.1, 2.16.2"
-      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.6.0,0.4.35
+      :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.6.0,0.4.35
      :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0
      :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,b5997
      `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.22.0,1.20.0
@@ -70,7 +70,7 @@ compatibility and system requirements.
      CUB,2.8.5,2.6.0,2.5.0
      ,,,
      DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
-      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
+      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0 [#mi325x_KVM]_, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
      ,,,
      ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
      :doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
@@ -183,8 +183,9 @@ compatibility and system requirements.
 .. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7.
 .. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
 .. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
-.. [#dgl_compat] DGL is supported only on ROCm 6.4.0.
+.. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
 .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
+.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
 .. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
 .. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
 .. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
@@ -303,12 +304,13 @@ Expand for full historical view of:
   .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
   .. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
   .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
-   .. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0.
+   .. [#dgl_compat-past-60] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
   .. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
   .. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
   .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.
   .. [#llama-cpp_compat-past-60] llama.cpp is supported only on ROCm 7.0.0 and 6.4.x.
   .. [#flashinfer_compat-past-60] FlashInfer is supported only on ROCm 6.4.1.
+   .. [#mi325x_KVM-past-60] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
   .. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
   .. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
   .. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
--- a/docs/compatibility/ml-compatibility/dgl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/dgl-compatibility.rst
@@ -39,13 +39,13 @@ Support overview
 Version support
 --------------------------------------------------------------------------------

-DGL is supported on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+DGL is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__, 
+`ROCm 6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__, and `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.

 Supported devices
 --------------------------------------------------------------------------------

- **Officially Supported**: AMD Instinct™ MI300X (through `hipBLASlt <https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/index.html>`__)
- **Partially Supported**: AMD Instinct™ MI250X
+**Officially Supported**: AMD Instinct™ MI300X, MI250X

 .. _dgl-recommendations:

@@ -60,16 +60,35 @@ GAT, GCN, and GraphSage. Using these models, a variety of use cases are supporte
 - 1D (Temporal) and 2D (Image) Classification
 - Drug Discovery

-Multiple use cases of DGL have been tested and verified.
-However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
-Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_, 
-where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs. 
+For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
+where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.

-Coverage includes:
+* Although multiple use cases of DGL have been tested and verified, a few have been  
+  outlined in the `DGL in the Real World: Running GNNs on Real Use Cases 
+  <https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog 
+  post, which walks through four real-world graph neural network (GNN) workloads 
+  implemented with the Deep Graph Library on ROCm. It covers tasks ranging from 
+  heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph 
+  regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use 
+  case, the authors detail: the dataset and task, how DGL is used, and their experience 
+  porting to ROCm. It is shown that DGL codebases often run without modification, with 
+  seamless integration of graph operations, message passing, sampling, and convolution. 

- Single-GPU training/inference
- Multi-GPU training
+* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware 
+  <https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__ 
+  blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform, 
+  bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges 
+  the gap between dense tensor frameworks and the irregular nature of graph data through a 
+  graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and 
+  interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration 
+  enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers 
+  and open-source repositories. This marks a major step in AMD's mission to advance open, 
+  scalable AI ecosystems beyond traditional architectures.

+You can pre-process datasets and begin training on AMD GPUs through:
+
+* Single-GPU training/inference
+* Multi-GPU training

 .. _dgl-docker-compat:

@@ -85,7 +104,7 @@ with ROCm backends on Docker Hub. The following Docker image tags and associated
 inventories represent the latest available DGL version from the official Docker Hub. 
 Click the |docker-icon| to view the image on Docker Hub.

-.. list-table:: DGL Docker image components
+.. list-table::
    :header-rows: 1
    :class: docker-image-compatibility

@@ -98,43 +117,83 @@ Click the |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>

-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
+      - `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>

-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
+      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
+      - 24.04
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
+      - 22.04
+      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+
+      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
+      - 24.04
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
+      - 24.04
+      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
+
+    * - .. raw:: html
+
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
+
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
+      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
+      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__


    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>

-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
+      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__


    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>

-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
+      - `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
      
@@ -150,81 +209,102 @@ If you prefer to build it yourself, ensure the following dependencies are instal
    :header-rows: 1

    * - ROCm library
-      - ROCm 6.4.0 Version
+      - ROCm 7.0.0 Version
+      - ROCm 6.4.x Version
      - Purpose
    * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
+      - 1.1.0
      - 1.1.0
      - Enables faster execution of core operations like matrix multiplication
        (GEMM), convolutions and transformations.
    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
+      - 3.0.0
      - 2.4.0
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
+      - 1.0.0
      - 0.12.0
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
+      - 4.0.0
      - 3.4.0
      - Provides a C++ template library for parallel algorithms for reduction,
        scan, sort and select.
    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
+      - 1.0.20
      - 1.0.18
      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
+      - 3.0.0
      - 2.12.0
      - Provides fast random number generation for GPUs.
    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
+      - 3.0.0
      - 2.4.0
      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
        singular value decompositions (SVD).
    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
+      - 4.0.1
      - 3.2.0
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
+      - 0.2.4
      - 0.2.3
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipTensor <https://github.com/ROCm/hipTensor>`_
+      - 2.0.0
      - 1.5.0
      - Optimizes for high-performance tensor operations, such as contractions.
    * - `MIOpen <https://github.com/ROCm/MIOpen>`_
+      - 3.5.0
      - 3.4.0
      - Optimizes deep learning primitives such as convolutions, pooling,
        normalization, and activation functions.
    * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
+      - 2.13.0
      - 2.12.0
      - Adds graph-level optimizations, ONNX models and mixed precision support
        and enable Ahead-of-Time (AOT) Compilation.
    * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
+      - 3.3.0
      - 3.2.0
      - Optimizes acceleration for computer vision and AI workloads like
        preprocessing, augmentation, and inferencing.
    * - `rocAL <https://github.com/ROCm/rocAL>`_
-      - :version-ref:`rocAL rocm_version`
+      - 3.3.0
+      - 2.2.0
      - Accelerates the data pipeline by offloading intensive preprocessing and
        augmentation tasks. rocAL is part of MIVisionX.
    * - `RCCL <https://github.com/ROCm/rccl>`_
-      - 2.2.0
+      - 2.26.6
+      - 2.22.3
      - Optimizes for multi-GPU communication for operations like AllReduce and
        Broadcast.
    * - `rocDecode <https://github.com/ROCm/rocDecode>`_
+      - 1.0.0
      - 0.10.0
      - Provides hardware-accelerated data decoding capabilities, particularly
        for image, video, and other dataset formats.
    * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
+      - 1.1.0
      - 0.8.0
      - Provides hardware-accelerated JPEG image decoding and encoding.
    * - `RPP <https://github.com/ROCm/RPP>`_
+      - 2.0.0
      - 1.9.10
      - Speeds up data augmentation, transformation, and other preprocessing steps.
    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
+      - 4.0.0
      - 3.3.0
      - Provides a C++ template library for parallel algorithms like sorting,
        reduction, and scanning.
    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
+      - 2.0.0
      - 1.7.0
      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
        multiplication (GEMM) and accumulation operations with mixed precision
@@ -253,26 +333,29 @@ Instead of listing them all, support is grouped into the following categories to
 * DGL NN
 * DGL Optim
 * DGL Sparse
-
+* GraphBolt

 Unsupported features
 ================================================================================

-* GraphBolt
-* Partial TF32 Support (MI250X only)
+* TF32 Support (only supported for PyTorch 2.7 and above)
 * Kineto/ROCTracer integration


 Unsupported functions
 ================================================================================

-* ``more_nnz``
+* ``bfs``
 * ``format``
 * ``multiprocess_sparse_adam_state_dict``
-* ``record_stream_ndarray``
 * ``half_spmm``
 * ``segment_mm`` 
 * ``gather_mm_idx_b``
-* ``pgexplainer``
 * ``sample_labors_prob``
 * ``sample_labors_noprob``
+* ``sparse_admin``
+
+Previous versions
+===============================================================================
+See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
+of the ``ROCm/dgl`` Docker image.
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -43,6 +43,26 @@ quarterly alongside new ROCm releases. These images undergo full AMD testing.
 `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community/tags>`_
 follow upstream JAX releases and use the latest available ROCm version.

+JAX Plugin-PJRT with JAX/JAXLIB compatibility
+================================================================================
+
+Portable JIT Runtime (PJRT) is an open, stable interface for device runtime and
+compiler. The following table details the ROCm version compatibility matrix
+between JAX Plugin–PJRT and JAX/JAXLIB.
+
+.. list-table::
+    :header-rows: 1
+
+    * - JAX Plugin-PJRT
+      - JAX/JAXLIB
+      - ROCm
+    * - 0.7.1
+      - 0.7.1
+      - 7.1.1, 7.1.0
+    * - 0.6.0
+      - 0.6.2, 0.6.0
+      - 7.0.2, 7.0.1, 7.0.0
+
 Use cases and recommendations
 ================================================================================

--- a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
@@ -45,7 +45,7 @@ llama.cpp is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
 Supported devices
 --------------------------------------------------------------------------------

-**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210
+**Officially Supported**: AMD Instinct™ MI325X, MI300X, MI210

 Use cases and recommendations
 ================================================================================
@@ -109,27 +109,27 @@ Click |docker-icon| to view the image on Docker Hub.

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_full/images/sha256-a2ecd635eaa65bb289a9041330128677f3ae88bee6fee0597424b17e38d4903c"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_server/images/sha256-cb46b47df415addb5ceb6e6fdf0be70bf9d7f6863bbe6e10c2441ecb84246d52"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_light/images/sha256-8f8536eec4b05c0ff1c022f9fc6c527ad1c89e6c1ca0906e4d39e4de73edbde9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
-      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 24.04

    * - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_full/images/sha256-f36de2a3b03ae53e81c85422cb3780368c9891e1ac7884b04403a921fe2ea45d"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_server/images/sha256-df15e8ab11a6837cd3736644fec1e047465d49e37d610ab0b79df000371327df"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html

-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_light/images/sha256-4ea2d5bb7964f0ee3ea9b30ba7f343edd6ddfab1b1037669ca7eafad2e3c2bd7"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
-      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 22.04

--- a/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/tensorflow-compatibility.rst
@@ -136,7 +136,7 @@ The following section maps supported data types and GPU-accelerated TensorFlow
 features to their minimum supported ROCm and TensorFlow versions.

 Data types
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------

 The data type of a tensor is specified using the ``dtype`` attribute or
 argument, and TensorFlow supports a wide range of data types for different use
@@ -254,7 +254,7 @@ are as follows:
      - 1.7

 Features
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------

 This table provides an overview of key features in TensorFlow and their
 availability in ROCm.
@@ -346,7 +346,7 @@ availability in ROCm.
      - 1.9.2

 Distributed library features
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-------------------------------------

 Enables developers to scale computations across multiple devices on a single machine or
 across multiple machines.
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
@@ -0,0 +1,316 @@
+dockers:
+  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
+    components:
+      ROCm: 7.0.0
+      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
+      PyTorch: 2.9.0a0+git1c57644
+      hipBLASLt: 1.0.0
+    dockerfile:
+      commit: 790d22168820507f3105fef29596549378cfe399
+model_groups:
+  - group: Meta Llama
+    tag: llama
+    models:
+      - model: Llama 2 70B
+        mad_tag: pyt_vllm_llama-2-70b
+        model_repo: meta-llama/Llama-2-70b-chat-hf
+        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 4096
+          max_model_len: 4096
+      - model: Llama 3.1 8B
+        mad_tag: pyt_vllm_llama-3.1-8b
+        model_repo: meta-llama/Llama-3.1-8B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 8B FP8
+        mad_tag: pyt_vllm_llama-3.1-8b_fp8
+        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B
+        mad_tag: pyt_vllm_llama-3.1-405b
+        model_repo: meta-llama/Llama-3.1-405B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B FP8
+        mad_tag: pyt_vllm_llama-3.1-405b_fp8
+        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B MXFP4
+        mad_tag: pyt_vllm_llama-3.1-405b_fp4
+        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B
+        mad_tag: pyt_vllm_llama-3.3-70b
+        model_repo: meta-llama/Llama-3.3-70B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B FP8
+        mad_tag: pyt_vllm_llama-3.3-70b_fp8
+        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B MXFP4
+        mad_tag: pyt_vllm_llama-3.3-70b_fp4
+        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 4 Scout 17Bx16E
+        mad_tag: pyt_vllm_llama-4-scout-17b-16e
+        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E FP8
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: DeepSeek
+    tag: deepseek
+    models:
+      - model: DeepSeek R1 0528 FP8
+        mad_tag: pyt_vllm_deepseek-r1
+        model_repo: deepseek-ai/DeepSeek-R1-0528
+        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_seqs: 1024
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: OpenAI GPT OSS
+    tag: gpt-oss
+    models:
+      - model: GPT OSS 20B
+        mad_tag: pyt_vllm_gpt-oss-20b
+        model_repo: openai/gpt-oss-20b
+        url: https://huggingface.co/openai/gpt-oss-20b
+        precision: bfloat16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
+      - model: GPT OSS 120B
+        mad_tag: pyt_vllm_gpt-oss-120b
+        model_repo: openai/gpt-oss-120b
+        url: https://huggingface.co/openai/gpt-oss-120b
+        precision: bfloat16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
+  - group: Mistral AI
+    tag: mistral
+    models:
+      - model: Mixtral MoE 8x7B
+        mad_tag: pyt_vllm_mixtral-8x7b
+        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x7B FP8
+        mad_tag: pyt_vllm_mixtral-8x7b_fp8
+        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B
+        mad_tag: pyt_vllm_mixtral-8x22b
+        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B FP8
+        mad_tag: pyt_vllm_mixtral-8x22b_fp8
+        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
+  - group: Qwen
+    tag: qwen
+    models:
+      - model: Qwen3 8B
+        mad_tag: pyt_vllm_qwen3-8b
+        model_repo: Qwen/Qwen3-8B
+        url: https://huggingface.co/Qwen/Qwen3-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 32B
+        mad_tag: pyt_vllm_qwen3-32b
+        model_repo: Qwen/Qwen3-32b
+        url: https://huggingface.co/Qwen/Qwen3-32B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B
+        mad_tag: pyt_vllm_qwen3-30b-a3b
+        model_repo: Qwen/Qwen3-30B-A3B
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B FP8
+        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
+        model_repo: Qwen/Qwen3-30B-A3B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B
+        mad_tag: pyt_vllm_qwen3-235b-a22b
+        model_repo: Qwen/Qwen3-235B-A22B
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B FP8
+        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
+        model_repo: Qwen/Qwen3-235B-A22B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+  - group: Microsoft Phi
+    tag: phi
+    models:
+      - model: Phi-4
+        mad_tag: pyt_vllm_phi-4
+        model_repo: microsoft/phi-4
+        url: https://huggingface.co/microsoft/phi-4
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 16384
+          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,13 +1,13 @@
 dockers:
-  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
-    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
+  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.1_20251103
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.1_20251103/images/sha256-8d60429043d4d00958da46039a1de0d9b82df814d45da482497eef26a6076506
    components:
      ROCm: 7.0.0
-      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
+      vLLM: 0.11.1 (0.11.1rc2.dev141+g38f225c2a.rocm700)
      PyTorch: 2.9.0a0+git1c57644
      hipBLASLt: 1.0.0
    dockerfile:
-      commit: 790d22168820507f3105fef29596549378cfe399
+      commit: 38f225c2abeadc04c2cc398814c2f53ea02c3c72
 model_groups:
  - group: Meta Llama
    tag: llama
--- a/docs/how-to/deep-learning-rocm.rst
+++ b/docs/how-to/deep-learning-rocm.rst
@@ -84,6 +84,8 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
      - 
        - `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__
+        - `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-wheels-package>`__
+
      - .. raw:: html

          <a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a> 
--- a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
@@ -46,6 +46,8 @@ The following variables are generally useful for Instinct MI300X/MI355X GPUs and
    multi-GPU distributed workloads** (tensor parallelism, pipeline
    parallelism). Single-GPU inference does not need this.

+.. _vllm-optimization-aiter-switches:
+
 AITER (AI Tensor Engine for ROCm) switches
 ==========================================

@@ -65,7 +67,7 @@ Quick start examples:
   export VLLM_ROCM_USE_AITER=1
   vllm serve MODEL_NAME

-   # Enable only AITER Triton Prefill-Decode (split) attention
+   # Enable AITER Fused MoE and enable Triton Prefill-Decode (split) attention
   export VLLM_ROCM_USE_AITER=1
   export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1
   export VLLM_ROCM_USE_AITER_MHA=0
@@ -242,14 +244,17 @@ Most users won't need this, but you can override the defaults:
   * - AITER MHA (standard models)
     - ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models)

-   * - AITER Triton Prefill-Decode (split)
+   * - vLLM Triton Unified (default)
+     - ``VLLM_ROCM_USE_AITER=0`` (or unset)
+
+   * - Triton Prefill-Decode (split) without AITER
+     - | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
+
+   * - Triton Prefill-Decode (split) along with AITER Fused-MoE
     - | ``VLLM_ROCM_USE_AITER=1``
       | ``VLLM_ROCM_USE_AITER_MHA=0``
       | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``

-   * - vLLM Triton Unified (default)
-     - ``VLLM_ROCM_USE_AITER=0`` (or unset)
-
   * - AITER Unified Attention
     - | ``VLLM_ROCM_USE_AITER=1``
       | ``VLLM_ROCM_USE_AITER_MHA=0``
@@ -267,11 +272,11 @@ Most users won't need this, but you can override the defaults:
       --block-size 1 \
       --tensor-parallel-size 8

-   # Advanced: Use Prefill-Decode split (for short input cases)
+   # Advanced: Use Prefill-Decode split (for short input cases) with AITER Fused-MoE
   VLLM_ROCM_USE_AITER=1 \
   VLLM_ROCM_USE_AITER_MHA=0 \
   VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \
-   vllm serve meta-llama/Llama-3.3-70B-Instruct
+   vllm serve meta-llama/Llama-4-Scout-17B-16E

 **Which backend should I choose?**

@@ -350,14 +355,14 @@ vLLM V1 on ROCm provides these attention implementations:

 3. **AITER Triton Prefill–Decode Attention** (hybrid, Instinct MI300X-optimized)

-   * Enable with ``VLLM_ROCM_USE_AITER=1``, ``VLLM_ROCM_USE_AITER_MHA=0``, and ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
+   * Enable with ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
   * Uses separate kernels for prefill and decode phases:

     * **Prefill**: ``context_attention_fwd`` Triton kernel
     * **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 1–16, context ≤131k; sliding window not supported)
     * **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements

-   * Usually better compared to unified Triton kernels (both vLLM and AITER variants)
+   * Usually better compared to unified Triton kernels
   * Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios
   * The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**)

@@ -693,7 +698,9 @@ There are two strategies:
 vLLM engine arguments
 =====================

-Selected arguments that often help on ROCm. See `engine args docs <https://docs.vllm.ai/en/latest/serving/engine_args.html>`_ for the full list.
+Selected arguments that often help on ROCm. See `Engine Arguments
+<https://docs.vllm.ai/en/stable/configuration/engine_args.html>`__ in the vLLM
+documentation for the full list.

 Configure --max-num-seqs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006.rst
@@ -0,0 +1,482 @@
+:orphan:
+
+.. meta::
+   :description: Learn how to validate LLM inference performance on MI300X GPUs using AMD MAD and the ROCm vLLM Docker image.
+   :keywords: model, MAD, automation, dashboarding, validate
+
+**********************************
+vLLM inference performance testing
+**********************************
+
+.. caution::
+
+   This documentation does not reflect the latest version of ROCm vLLM
+   inference performance documentation. See :doc:`../vllm` for the latest version.
+
+.. _vllm-benchmark-unified-docker-930:
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+
+   The `ROCm vLLM Docker <{{ docker.docker_hub_url }}>`_ image offers a
+   prebuilt, optimized environment for validating large language model (LLM)
+   inference performance on AMD Instinct™ MI355X, MI350X, MI325X and MI300X
+   GPUs. This ROCm vLLM Docker image integrates vLLM and PyTorch tailored
+   specifically for AMD data center GPUs and includes the following components:
+
+   .. tab-set::
+
+      .. tab-item:: {{ docker.pull_tag }}
+
+         .. list-table::
+            :header-rows: 1
+
+            * - Software component
+              - Version
+
+            {% for component_name, component_version in docker.components.items() %}
+            * - {{ component_name }}
+              - {{ component_version }}
+            {% endfor %}
+
+With this Docker image, you can quickly test the :ref:`expected
+inference performance numbers <vllm-benchmark-performance-measurements-930>` for
+AMD Instinct GPUs.
+
+What's new
+==========
+
+The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <vllm-history>`.
+
+* Added support for AMD Instinct MI355X and MI350X GPUs.
+
+* Added support and benchmarking instructions for the following models. See :ref:`vllm-benchmark-supported-models-930`.
+
+  * Llama 4 Scout and Maverick
+
+  * DeepSeek R1 0528 FP8
+
+  * MXFP4 models (MI355X and MI350X only): Llama 3.3 70B MXFP4 and Llama 3.1 405B MXFP4
+
+  * GPT OSS 20B and 120B
+
+  * Qwen 3 32B, 30B-A3B, and 235B-A22B
+
+* Removed the deprecated ``--max-seq-len-to-capture`` flag.
+
+* ``--gpu-memory-utilization`` is now configurable via the `configuration files
+  <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__ in the MAD
+  repository.
+
+.. _vllm-benchmark-supported-models-930:
+
+Supported models
+================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   .. _vllm-benchmark-available-models-930:
+
+   The following models are supported for inference performance benchmarking
+   with vLLM and ROCm. Some instructions, commands, and recommendations in this
+   documentation might vary by model -- select one to get started. MXFP4 models
+   are only supported on MI355X and MI350X GPUs.
+
+   .. raw:: html
+
+      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
+         <div class="row gx-0">
+            <div class="col-2 me-1 px-2 model-param-head">Model</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+               <div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+      {% endfor %}
+            </div>
+         </div>
+
+         <div class="row gx-0 pt-1">
+            <div class="col-2 me-1 px-2 model-param-head">Variant</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+         {% set models = model_group.models %}
+         {% for model in models %}
+            {% if models|length % 3 == 0 %}
+               <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% else %}
+               <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% endif %}
+         {% endfor %}
+      {% endfor %}
+            </div>
+         </div>
+      </div>
+
+   .. _vllm-benchmark-vllm-930:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{ model.mad_tag }}
+
+
+      {% if model.precision == "float4" %}
+      .. important::
+
+         MXFP4 is supported only on MI355X and MI350X GPUs.
+      {% endif %}
+
+      .. note::
+
+         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
+         Some models require access authorization prior to use via an external license agreement through a third party.
+      {% if model.precision == "float8" and model.model_repo.startswith("amd") %}
+         This model uses FP8 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
+      {% endif %}
+      {% if model.precision == "float4" and model.model_repo.startswith("amd") %}
+         This model uses FP4 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
+      {% endif %}
+
+      {% endfor %}
+   {% endfor %}
+
+.. _vllm-benchmark-performance-measurements-930:
+
+Performance measurements
+========================
+
+To evaluate performance, the
+`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+page provides reference throughput and serving measurements for inferencing popular AI models.
+
+.. important::
+
+   The performance data presented in
+   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+   only reflects the latest version of this inference benchmarking environment.
+   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct GPUs or ROCm software.
+
+System validation
+=================
+
+Before running AI workloads, it's important to validate that your AMD hardware is configured
+correctly and performing optimally.
+
+If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+before starting training.
+
+To test for optimal performance, consult the recommended :ref:`System health benchmarks
+<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+system's configuration.
+
+Pull the Docker image
+=====================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+
+   Download the `ROCm vLLM Docker image <{{ docker.docker_hub_url }}>`_.
+   Use the following command to pull the Docker image from Docker Hub.
+
+   .. code-block:: shell
+
+      docker pull {{ docker.pull_tag }}
+
+Benchmarking
+============
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   Once the setup is complete, choose between two options to reproduce the
+   benchmark results:
+
+   .. _vllm-benchmark-mad-930:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{model.mad_tag}}
+
+      .. tab-set::
+
+         .. tab-item:: MAD-integrated benchmarking
+
+            The following run command is tailored to {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+
+            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
+               directory and install the required packages on the host machine.
+
+               .. code-block:: shell
+
+                  git clone https://github.com/ROCm/MAD
+                  cd MAD
+                  pip install -r requirements.txt
+
+            2. On the host machine, use this command to run the performance benchmark test on
+               the `{{model.model}} <{{ model.url }}>`_ model using one node with the
+               :literal:`{{model.precision}}` data type.
+
+               .. code-block:: shell
+
+                  export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
+                  madengine run \
+                      --tags {{model.mad_tag}} \
+                      --keep-model-dir \
+                      --live-output
+
+            MAD launches a Docker container with the name
+            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
+            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
+            and ``{{ model.mad_tag }}_serving.csv``.
+
+            Although the :ref:`available models
+            <vllm-benchmark-available-models-930>` are preconfigured to collect
+            offline throughput and online serving performance data, you can
+            also change the benchmarking parameters. See the standalone
+            benchmarking tab for more information.
+
+            {% if model.tunableop %}
+
+            .. note::
+
+               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
+               TunableOp automatically explores different implementations and configurations of certain PyTorch
+               operators to find the fastest one for your hardware.
+
+               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled (see
+               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable it, include
+               the ``--tunableop on`` argument in your run.
+
+               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the
+               performance-collection run.
+
+            {% endif %}
+
+         .. tab-item:: Standalone benchmarking
+
+            The following commands are optimized for {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+
+            .. seealso::
+
+               For more information on configuration, see the `config files
+               <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__
+               in the MAD repository. Refer to the `vLLM engine <https://docs.vllm.ai/en/latest/configuration/engine_args.html#engineargs>`__
+               for descriptions of available configuration options
+               and `Benchmarking vLLM <https://github.com/vllm-project/vllm/blob/main/benchmarks/README.md>`__ for
+               additional benchmarking information.
+
+            .. rubric:: Launch the container
+
+            You can run the vLLM benchmark tool independently by starting the
+            `Docker container <{{ docker.docker_hub_url }}>`_ as shown
+            in the following snippet.
+
+            .. code-block:: shell
+
+               docker pull {{ docker.pull_tag }}
+               docker run -it \
+                   --device=/dev/kfd \
+                   --device=/dev/dri \
+                   --group-add video \
+                   --shm-size 16G \
+                   --security-opt seccomp=unconfined \
+                   --security-opt apparmor=unconfined \
+                   --cap-add=SYS_PTRACE \
+                   -v $(pwd):/workspace \
+                   --env HUGGINGFACE_HUB_CACHE=/workspace \
+                   --name test \
+                   {{ docker.pull_tag }}
+
+            .. rubric:: Throughput command
+
+            Use the following command to start the throughput benchmark.
+
+            .. code-block:: shell
+
+               model={{ model.model_repo }}
+               tp={{ model.config.tp }}
+               num_prompts={{ model.config.num_prompts | default(1024) }}
+               in={{ model.config.in | default(128) }}
+               out={{ model.config.in | default(128) }}
+               dtype={{ model.config.dtype | default("auto") }}
+               kv_cache_dtype={{ model.config.kv_cache_dtype }}
+               max_num_seqs={{ model.config.max_num_seqs | default(1024) }}
+               max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+               max_model_len={{ model.config.max_model_len }}
+
+               vllm bench throughput --model $model \
+                   -tp $tp \
+                   --num-prompts $num_prompts \
+                   --input-len $in \
+                   --output-len $out \
+                   --dtype $dtype \
+                   --kv-cache-dtype $kv_cache_dtype \
+                   --max-num-seqs $max_num_seqs \
+                   --max-num-batched-tokens $max_num_batched_tokens \
+                   --max-model-len $max_model_len \
+                   --trust-remote-code \
+                   --output-json ${model}_throughput.json \
+                   --gpu-memory-utilization {{ model.config.gpu_memory_utilization | default(0.9) }}
+
+            .. rubric:: Serving command
+
+            1. Start the server using the following command:
+
+               .. code-block:: shell
+
+                  model={{ model.model_repo }}
+                  tp={{ model.config.tp }}
+                  dtype={{ model.config.dtype }}
+                  kv_cache_dtype={{ model.config.kv_cache_dtype }}
+                  max_num_seqs=256
+                  max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+                  max_model_len={{ model.config.max_model_len }}
+
+                  vllm serve $model \
+                      -tp $tp \
+                      --dtype $dtype \
+                      --kv-cache-dtype $kv_cache_dtype \
+                      --max-num-seqs $max_num_seqs \
+                      --max-num-batched-tokens $max_num_batched_tokens \
+                      --max-model-len $max_model_len \
+                      --no-enable-prefix-caching \
+                      --swap-space 16 \
+                      --disable-log-requests \
+                      --trust-remote-code \
+                      --gpu-memory-utilization 0.9
+
+               Wait until the model has loaded and the server is ready to accept requests.
+
+            2. On another terminal on the same machine, run the benchmark:
+
+               .. code-block:: shell
+
+                  # Connect to the container
+                  docker exec -it test bash
+
+                  # Wait for the server to start
+                  until curl -s http://localhost:8000/v1/models; do sleep 30; done
+
+                  # Run the benchmark
+                  model={{ model.model_repo }}
+                  max_concurrency=1
+                  num_prompts=10
+                  in=128
+                  out=128
+                  vllm bench serve --model $model \
+                      --percentile-metrics "ttft,tpot,itl,e2el" \
+                      --dataset-name random \
+                      --ignore-eos \
+                      --max-concurrency $max_concurrency \
+                      --num-prompts $num_prompts \
+                      --random-input-len $in \
+                      --random-output-len $out \
+                      --trust-remote-code \
+                      --save-result \
+                      --result-filename ${model}_serving.json
+
+            .. note::
+
+               For improved performance with certain Mixture of Experts models, such as Mixtral 8x22B,
+               try adding ``export VLLM_ROCM_USE_AITER=1`` to your commands.
+
+               If you encounter the following error, pass your access-authorized Hugging
+               Face token to the gated models.
+
+               .. code-block::
+
+                  OSError: You are trying to access a gated repo.
+
+                  # pass your HF_TOKEN
+                  export HF_TOKEN=$your_personal_hf_token
+
+            .. raw:: html
+
+               <style>
+               mjx-container[jax="CHTML"][display="true"] {
+                  text-align: left;
+                  margin: 0;
+               }
+               </style>
+
+            .. note::
+
+               Throughput is calculated as:
+
+               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
+
+               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
+      {% endfor %}
+   {% endfor %}
+
+Advanced usage
+==============
+
+For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
+see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/docs/dev-docker/README.md>`__.
+
+Reproducing the Docker image
+----------------------------
+
+To reproduce this ROCm-enabled vLLM Docker image release, follow these steps:
+
+1. Clone the `vLLM repository <https://github.com/vllm-project/vllm>`__.
+
+   .. code-block:: shell
+
+      git clone https://github.com/vllm-project/vllm.git
+      cd vllm
+
+2. Use the following command to build the image directly from the specified commit.
+
+   .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+      {% set docker = data.dockers[0] %}
+      .. code-block:: shell
+
+         docker build -f docker/Dockerfile.rocm \
+             --build-arg REMOTE_VLLM=1 \
+             --build-arg VLLM_REPO=https://github.com/ROCm/vllm \
+             --build-arg VLLM_BRANCH="{{ docker.dockerfile.commit }}" \
+             -t vllm-rocm .
+
+   .. tip::
+
+      Replace ``vllm-rocm`` with your desired image tag.
+
+Further reading
+===============
+
+- To learn more about the options for latency and throughput benchmark scripts,
+  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
+
+- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
+
+- To learn more about system settings and management practices to configure your system for
+  AMD Instinct MI300X Series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+
+- See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
+  a brief introduction to vLLM and optimization strategies.
+
+- For application performance optimization strategies for HPC and AI workloads,
+  including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
+
+- For a list of other ready-made Docker images for AI with ROCm, see
+  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
+
+Previous versions
+=================
+
+See :doc:`vllm-history` to find documentation for previous releases
+of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
@@ -16,14 +16,23 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
     - Components
     - Resources

-   * - ``rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006``
+   * - ``rocm/vllm:rocm7.0.0_vllm_0.11.1_20251024``
       (latest)
+     -
+       * ROCm 7.0.0
+       * vLLM 0.11.1
+       * PyTorch 2.9.0
+     -
+       * :doc:`Documentation <../vllm>`
+       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5>`__
+
+   * - ``rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006``
     -
       * ROCm 7.0.0
       * vLLM 0.10.2
       * PyTorch 2.9.0
     -
-       * :doc:`Documentation <../vllm>`
+       * :doc:`Documentation <vllm-0.10.2-20251006>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5>`__

   * - ``rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909``
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
@@ -6,7 +6,7 @@
 vLLM inference performance testing
 **********************************

-.. _vllm-benchmark-unified-docker-930:
+.. _vllm-benchmark-unified-docker-1024:

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

@@ -34,7 +34,7 @@ vLLM inference performance testing
            {% endfor %}

 With this Docker image, you can quickly test the :ref:`expected
-inference performance numbers <vllm-benchmark-performance-measurements-930>` for
+inference performance numbers <vllm-benchmark-performance-measurements-1024>` for
 AMD Instinct GPUs.

 What's new
@@ -42,27 +42,13 @@ What's new

 The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <previous-versions/vllm-history>`.

-* Added support for AMD Instinct MI355X and MI350X GPUs.
+* Enabled :ref:`AITER <vllm-optimization-aiter-switches>` by default.

-* Added support and benchmarking instructions for the following models. See :ref:`vllm-benchmark-supported-models-930`.
+* Fixed ``rms_norm`` segfault issue with Qwen 3 235B.

-  * Llama 4 Scout and Maverick
+* Known performance degradation on Llama 4 models due to `an upstream vLLM issue <https://github.com/vllm-project/vllm/issues/26320>`_.

-  * DeepSeek R1 0528 FP8
-
-  * MXFP4 models (MI355X and MI350X only): Llama 3.3 70B MXFP4 and Llama 3.1 405B MXFP4
-
-  * GPT OSS 20B and 120B
-
-  * Qwen 3 32B, 30B-A3B, and 235B-A22B
-
-* Removed the deprecated ``--max-seq-len-to-capture`` flag.
-
-* ``--gpu-memory-utilization`` is now configurable via the `configuration files
-  <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__ in the MAD
-  repository.
-
-.. _vllm-benchmark-supported-models-930:
+.. _vllm-benchmark-supported-models-1024:

 Supported models
 ================
@@ -72,7 +58,7 @@ Supported models
   {% set docker = data.dockers[0] %}
   {% set model_groups = data.model_groups %}

-   .. _vllm-benchmark-available-models-930:
+   .. _vllm-benchmark-available-models-1024:

   The following models are supported for inference performance benchmarking
   with vLLM and ROCm. Some instructions, commands, and recommendations in this
@@ -108,7 +94,7 @@ Supported models
         </div>
      </div>

-   .. _vllm-benchmark-vllm-930:
+   .. _vllm-benchmark-vllm-1024:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -136,7 +122,7 @@ Supported models
      {% endfor %}
   {% endfor %}

-.. _vllm-benchmark-performance-measurements-930:
+.. _vllm-benchmark-performance-measurements-1024:

 Performance measurements
 ========================
@@ -192,7 +178,7 @@ Benchmarking
   Once the setup is complete, choose between two options to reproduce the
   benchmark results:

-   .. _vllm-benchmark-mad-930:
+   .. _vllm-benchmark-mad-1024:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -204,7 +190,7 @@ Benchmarking
         .. tab-item:: MAD-integrated benchmarking

            The following run command is tailored to {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-1024` to switch to another available model.

            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
               directory and install the required packages on the host machine.
@@ -233,7 +219,7 @@ Benchmarking
            and ``{{ model.mad_tag }}_serving.csv``.

            Although the :ref:`available models
-            <vllm-benchmark-available-models-930>` are preconfigured to collect
+            <vllm-benchmark-available-models-1024>` are preconfigured to collect
            offline throughput and online serving performance data, you can
            also change the benchmarking parameters. See the standalone
            benchmarking tab for more information.
@@ -258,7 +244,7 @@ Benchmarking
         .. tab-item:: Standalone benchmarking

            The following commands are optimized for {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-1024` to switch to another available model.

            .. seealso::

@@ -419,6 +405,10 @@ Advanced usage
 For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
 see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/docs/dev-docker/README.md>`__.

+.. note::
+
+   If you’re using this Docker image on other AMD GPUs such as the AMD Instinct MI200 Series or Radeon, add ``export VLLM_ROCM_USE_AITER=0`` to your command, since AITER is only supported on gfx942 and gfx950 architectures.
+
 Reproducing the Docker image
 ----------------------------

--- a/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
+++ b/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
@@ -22,7 +22,7 @@ See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `officia
 <https://docs.vllm.ai/>`_ for more information.

 For guidance on using vLLM with ROCm, refer to `Installation with ROCm
-<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html>`_.
+<https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#amd-rocm>`__.

 vLLM installation
 -----------------
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1,4 +1,4 @@
-rocm-docs-core==1.27.0
+rocm-docs-core==1.29.0
 sphinx-reredirects
 sphinx-sitemap
 sphinxcontrib.datatemplates==0.11.0
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -2,13 +2,13 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile docs/sphinx/requirements.in
+#    pip-compile requirements.in
 #
 accessible-pygments==0.0.5
    # via pydata-sphinx-theme
 alabaster==1.0.0
    # via sphinx
-asttokens==3.0.0
+asttokens==3.0.1
    # via stack-data
 attrs==25.4.0
    # via
@@ -19,25 +19,27 @@ babel==2.17.0
    # via
    #   pydata-sphinx-theme
    #   sphinx
+beartype==0.22.6
+    # via sphinx-substitution-extensions
 beautifulsoup4==4.14.2
    # via pydata-sphinx-theme
 breathe==4.36.0
    # via rocm-docs-core
-certifi==2025.10.5
+certifi==2025.11.12
    # via requests
 cffi==2.0.0
    # via
    #   cryptography
    #   pynacl
-charset-normalizer==3.4.3
+charset-normalizer==3.4.4
    # via requests
-click==8.3.0
+click==8.3.1
    # via
    #   jupyter-cache
    #   sphinx-external-toc
 comm==0.2.3
    # via ipykernel
-cryptography==46.0.2
+cryptography==46.0.3
    # via pyjwt
 debugpy==1.8.17
    # via ipykernel
@@ -50,7 +52,8 @@ docutils==0.21.2
    #   myst-parser
    #   pydata-sphinx-theme
    #   sphinx
-exceptiongroup==1.3.0
+    #   sphinx-substitution-extensions
+exceptiongroup==1.3.1
    # via ipython
 executing==2.2.1
    # via stack-data
@@ -64,7 +67,7 @@ gitpython==3.1.45
    # via rocm-docs-core
 greenlet==3.2.4
    # via sqlalchemy
-idna==3.10
+idna==3.11
    # via requests
 imagesize==1.4.1
    # via sphinx
@@ -72,7 +75,7 @@ importlib-metadata==8.7.0
    # via
    #   jupyter-cache
    #   myst-nb
-ipykernel==6.30.1
+ipykernel==7.1.0
    # via myst-nb
 ipython==8.37.0
    # via
@@ -94,7 +97,7 @@ jupyter-client==8.6.3
    # via
    #   ipykernel
    #   nbclient
-jupyter-core==5.8.1
+jupyter-core==5.9.1
    # via
    #   ipykernel
    #   jupyter-client
@@ -106,7 +109,7 @@ markdown-it-py==3.0.0
    #   myst-parser
 markupsafe==3.0.3
    # via jinja2
-matplotlib-inline==0.1.7
+matplotlib-inline==0.2.1
    # via
    #   ipykernel
    #   ipython
@@ -117,7 +120,9 @@ mdurl==0.1.2
 myst-nb==1.3.0
    # via rocm-docs-core
 myst-parser==4.0.1
-    # via myst-nb
+    # via
+    #   myst-nb
+    #   sphinx-substitution-extensions
 nbclient==0.10.2
    # via
    #   jupyter-cache
@@ -132,16 +137,17 @@ nest-asyncio==1.6.0
 packaging==25.0
    # via
    #   ipykernel
+    #   pydata-sphinx-theme
    #   sphinx
 parso==0.8.5
    # via jedi
 pexpect==4.9.0
    # via ipython
-platformdirs==4.4.0
+platformdirs==4.5.0
    # via jupyter-core
 prompt-toolkit==3.0.52
    # via ipython
-psutil==7.1.0
+psutil==7.1.3
    # via ipykernel
 ptyprocess==0.7.0
    # via pexpect
@@ -149,7 +155,7 @@ pure-eval==0.2.3
    # via stack-data
 pycparser==2.23
    # via cffi
-pydata-sphinx-theme==0.16.1
+pydata-sphinx-theme==0.15.4
    # via
    #   rocm-docs-core
    #   sphinx-book-theme
@@ -163,7 +169,7 @@ pygments==2.19.2
    #   sphinx
 pyjwt[crypto]==2.10.1
    # via pygithub
-pynacl==1.6.0
+pynacl==1.6.1
    # via pygithub
 python-dateutil==2.9.0.post0
    # via jupyter-client
@@ -179,7 +185,7 @@ pyzmq==27.1.0
    # via
    #   ipykernel
    #   jupyter-client
-referencing==0.36.2
+referencing==0.37.0
    # via
    #   jsonschema
    #   jsonschema-specifications
@@ -187,9 +193,9 @@ requests==2.32.5
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==1.27.0
+rocm-docs-core==1.29.0
    # via -r requirements.in
-rpds-py==0.27.1
+rpds-py==0.29.0
    # via
    #   jsonschema
    #   referencing
@@ -212,12 +218,11 @@ sphinx==8.1.3
    #   sphinx-copybutton
    #   sphinx-design
    #   sphinx-external-toc
-    #   sphinx-last-updated-by-git
    #   sphinx-notfound-page
-    #   sphinx-reredirects
+    #   sphinx-substitution-extensions
    #   sphinxcontrib-datatemplates
    #   sphinxcontrib-runcmd
-sphinx-book-theme==1.1.3
+sphinx-book-theme==1.1.4
    # via rocm-docs-core
 sphinx-copybutton==0.5.2
    # via rocm-docs-core
@@ -225,13 +230,9 @@ sphinx-design==0.6.1
    # via rocm-docs-core
 sphinx-external-toc==1.0.1
    # via rocm-docs-core
-sphinx-last-updated-by-git==0.3.8
-    # via sphinx-sitemap
 sphinx-notfound-page==1.1.0
    # via rocm-docs-core
-sphinx-reredirects==0.1.6
-    # via -r requirements.in
-sphinx-sitemap==2.9.0
+sphinx-substitution-extensions==2025.10.24
    # via -r requirements.in
 sphinxcontrib-applehelp==2.0.0
    # via sphinx
@@ -249,13 +250,13 @@ sphinxcontrib-runcmd==0.2.0
    # via sphinxcontrib-datatemplates
 sphinxcontrib-serializinghtml==2.0.0
    # via sphinx
-sqlalchemy==2.0.43
+sqlalchemy==2.0.44
    # via jupyter-cache
 stack-data==0.6.3
    # via ipython
 tabulate==0.9.0
    # via jupyter-cache
-tomli==2.2.1
+tomli==2.3.0
    # via sphinx
 tornado==6.5.2
    # via
--- a/tools/rocm-build/rocm-7.1.1.xml
+++ b/tools/rocm-build/rocm-7.1.1.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest>
+    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
+    <default revision="refs/tags/rocm-7.1.1"
+     remote="rocm-org"
+     sync-c="true"
+     sync-j="4" />
+<!--list of projects for ROCm-->
+    <project name="ROCK-Kernel-Driver" />
+    <project name="amdsmi" />
+    <project name="rocm_bandwidth_test" />
+    <project name="rocm-examples" />
+<!--HIP Projects-->
+    <project name="HIPIFY" />
+<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
+    <project name="half" />
+    <project name="llvm-project" />
+    <project name="spirv-llvm-translator" />
+<!-- gdb projects -->
+    <project name="ROCdbgapi" />
+    <project name="ROCgdb" />
+    <project name="rocr_debug_agent" />
+<!-- ROCm Libraries -->
+    <project groups="mathlibs" name="AMDMIGraphX" />
+    <project groups="mathlibs" name="MIVisionX" />
+    <project groups="mathlibs" name="ROCmValidationSuite" />
+    <project groups="mathlibs" name="composable_kernel" />
+    <project groups="mathlibs" name="hipSOLVER" />
+    <project groups="mathlibs" name="hipTensor" />
+    <project groups="mathlibs" name="hipTensor" />
+    <project groups="mathlibs" name="hipfort" />
+    <project groups="mathlibs" name="rccl" />
+    <project groups="mathlibs" name="rocAL" />
+    <project groups="mathlibs" name="rocALUTION" />
+    <project groups="mathlibs" name="rocDecode" />
+    <project groups="mathlibs" name="rocJPEG" />
+    <!-- The following components have been migrated to rocm-libraries:
+        hipBLAS-common hipBLAS hipBLASLt hipCUB
+        hipFFT hipRAND hipSPARSE hipSPARSELt
+        MIOpen rocBLAS rocFFT rocPRIM rocRAND
+        rocSPARSE rocThrust Tensile -->
+    <project groups="mathlibs" name="rocm-libraries" />
+    <!-- The following components have been migrated to rocm-systems:
+        aqlprofile clr hip hip-tests hipother
+        rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute 
+        rocprofiler-register rocprofiler-sdk rocprofiler-systems 
+        rocprofiler rocr-runtime roctracer -->
+    <project groups="mathlibs" name="rocm-systems" />
+    <project groups="mathlibs" name="rocPyDecode" />
+    <project groups="mathlibs" name="rocSHMEM" />
+    <project groups="mathlibs" name="rocSOLVER" />
+    <project groups="mathlibs" name="rocWMMA" />
+    <project groups="mathlibs" name="rocm-cmake" />
+    <project groups="mathlibs" name="rpp" />
+    <project groups="mathlibs" name="TransferBench" />
+<!-- Projects for OpenMP-Extras -->
+    <project name="aomp" path="openmp-extras/aomp" />
+    <project name="aomp-extras" path="openmp-extras/aomp-extras" />
+    <project name="flang" path="openmp-extras/flang" />
+</manifest>
Author	SHA1	Message	Date
Istvan Kiss	093752d7c4	Add JAX Plugin-PJRT support table 7.1.1	2025-11-26 16:51:25 +01:00
Alex Xu	d4cdbd79a3	Merge branch 'develop' into docs/7.1.1	2025-11-26 08:47:19 -05:00
srayasam-amd	096d91e190	Updating rocm version to 7.1.1 GA (#5697 ) * 7.1.1 GA update * 7.1.1 GA update * Update rocm-7.1.1.xml * Update default.xml	2025-11-26 16:08:03 +05:30
alexxu-amd	26d1ab7d27	Update documentation requirements	2025-11-25 16:30:46 -05:00
alexxu-amd	272c9f6be3	Update documentation requirements	2025-11-25 15:37:04 -05:00
Pratik Basyal	702d8e4c8e	New link updated for MIgraphx (#5691 )	2025-11-24 11:52:38 -05:00
amd-hsivasun	807ec6afcf	[Ex CI] Update AMDMIGraphX CMake version (#5683 )	2025-11-20 18:05:24 -05:00
amd-hsivasun	4c04da05c3	[Ex CI] Update pipeline ID for amdmis to monorepo (#5685 )	2025-11-20 18:05:17 -05:00
dependabot[bot]	411334716c	Bump rocm-docs-core from 1.28.0 to 1.29.0 in /docs/sphinx (#5659 ) Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.28.0 to 1.29.0. - [Release notes](https://github.com/ROCm/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.28.0...v1.29.0) --- updated-dependencies: - dependency-name: rocm-docs-core dependency-version: 1.29.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-11-20 13:54:33 -05:00
amd-hsivasun	99f0875e70	[Ex CI] amdsmi monorepo enablement (#5677 ) * [Ex CI] amdsmi monorepo enablement * Fix amdsmi yaml	2025-11-20 13:52:01 -05:00
Adel Johar	8d51d0e803	[Ex CI] Add CXX override for MIGraphX	2025-11-19 10:45:10 +01:00
Adel Johar	66b8b96c72	[Ex CI] Add missing dependencies for rccl and mivisionx	2025-11-19 10:45:10 +01:00
cfallows-amd	72107dd6d5	[Ex CI] Adding dependencies to rocprofiler-compute azure workflow (#5667 )	2025-11-14 12:24:56 -05:00
amd-hsivasun	99c1590057	[Ex CI] Added ROCM_PATH env var to rocprofiler-compute (#5666 )	2025-11-14 12:19:06 -05:00
Carrie Fallows	636d4cc736	Adding dependencies to rocmDependencies in rocprof-compute yaml. Now needed for building because of rocprofiler-sdk dependency. Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>	2025-11-13 20:56:45 -05:00
amd-hsivasun	d1ce815d8d	[Ex CI] Add rocprofiler-sdk dep to build for rocprofiler-compute (#5664 )	2025-11-13 16:08:02 -05:00
Pratik Basyal	80ced95526	Changelog updated (#5660 )	2025-11-13 10:18:15 -05:00
Pratik Basyal	09c6a9fdef	710 RCCL Known Issues and CRIU note update (#5647 ) * RCCL ALltoALL known issue added * CRIU note added * Minor change * Review feedback and AMDSMI detailed changelog link added * Github issue link added	2025-11-11 16:54:36 -05:00
peterjunpark	eb956cfc5c	Fixed wording related to VLLM_V1_USE_PREFILL_DECODE_ATTENTION (#5605 ) Co-authored-by: Hongxia Yang <hongxia.yang@amd.com>	2025-11-11 09:22:11 -05:00
peterjunpark	e05cdca54f	Fix references to vLLM docs (#5651 )	2025-11-11 09:00:07 -05:00
anisha-amd	04c7374f41	Docs: frameworks 25.10 - compatibility - DGL and llama.cpp (#5648 )	2025-11-10 15:26:54 -05:00
Alex Xu	39de859bd1	update rocm-docs-core to 1.29.0	2025-11-10 14:10:06 -05:00
amd-hsivasun	c8531ac7ea	[Ex CI] Update pipeline Id for hipTensor to monorepo (#5638 )	2025-11-10 13:32:10 -05:00
Pratik Basyal	420bbfa126	7.1.0 MI325X PLDM note updated (#5644 ) * PLDM note updated * Footnote update * Note added to compatibility * Lint error fixed	2025-11-08 09:08:21 -05:00
Pratik Basyal	4881887e2c	rocBLAS precision known issue added [Develop] (#5641 ) * rocBLAS precision known issue added * IPC note removed * Review feedback added	2025-11-07 19:45:33 -05:00
Pratik Basyal	148d6670ad	rocBLAS and HipBLASLt known issue added 7.1.0 (#5634 ) * rocBLAS and HipBLASLt known issue added * Title warning fixed * Jeff's feedback added * Leo's feedback incorporated * Minor feedback * MI325X PLDM udpate * Leo's feedback added * PyTorch profiling issue added * Changelog synced * JAX section removed * Ram's feedback added	2025-11-07 17:48:36 -05:00
amd-hsivasun	9770e9b6ef	[Ex CI] hiptensor Enablement (#5636 )	2025-11-07 16:08:46 -05:00
Joseph Macaranas	ee4cf66d67	[External CI] Add simde-devel in dnf mapping (#5635 )	2025-11-07 00:59:35 -05:00
amd-hsivasun	6ba30f191c	[Ex CI] rocWMMA increase timeout for test job (#5620 )	2025-11-06 11:38:07 -05:00
yugang-amd	674dc355e4	vLLM 10/24 release (#5626 ) * vLLM 10/24 release * updates per SME inputs * Update docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com>	2025-11-05 11:13:50 -05:00
Adel Johar	c7f3a56811	[Ex CI] Add half, rccl, and dependencies for rpp, mivisionx and rocjpeg	2025-11-05 15:59:15 +01:00