[External CI] Support rocprofiler-sdk libelf changes

vLLM 10/24 release (#5626 )
* vLLM 10/24 release * updates per SME inputs * Update docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com>
2026-01-20 20:18:15 -05:00 · 2025-11-05 15:27:16 -05:00 · 2025-11-05 11:13:50 -05:00 · 2025-11-05 15:59:15 +01:00 · 2025-10-31 18:19:40 -04:00 · 2025-10-31 14:57:13 -04:00
14 changed files with 967 additions and 112 deletions
--- a/.azuredevops/components/rocWMMA.yml
+++ b/.azuredevops/components/rocWMMA.yml
@@ -1,10 +1,29 @@
 parameters:
+- name: componentName
+  type: string
+  default: rocWMMA
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
+# monorepo related parameters
+- name: sparseCheckoutDir
+  type: string
+  default: ''
+- name: triggerDownstreamJobs
+  type: boolean
+  default: false
+- name: downstreamAggregateNames
+  type: string
+  default: ''
+- name: buildDependsOn
+  type: object
+  default: null
+- name: unifiedBuild
+  type: boolean
+  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -66,7 +85,11 @@ parameters:

 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: rocWMMA_build_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
+    ${{ if parameters.buildDependsOn }}:
+      dependsOn:
+        - ${{ each build in parameters.buildDependsOn }}:
+          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -81,6 +104,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
@@ -102,9 +126,12 @@ jobs:
  # gfx1030 not supported in documentation
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
+        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
+        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -112,43 +139,45 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}

- ${{ each job in parameters.jobMatrix.testJobs }}:
-  - job: rocWMMA_test_${{ job.target }}
-    timeoutInMinutes: 270
-    dependsOn: rocWMMA_build_${{ job.target }}
-    condition:
-      and(succeeded(),
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
-        eq(${{ parameters.aggregatePipeline }}, False)
-      )
-    variables:
-    - group: common
-    - template: /.azuredevops/variables-global.yml
-    pool: ${{ job.target }}_test_pool
-    workspace:
-      clean: all
-    steps:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-      parameters:
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-      parameters:
-        checkoutRef: ${{ parameters.checkoutRef }}
-        dependencyList: ${{ parameters.rocmTestDependencies }}
-        gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-      parameters:
-        componentName: rocWMMA
-        testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-      parameters:
-        aptPackages: ${{ parameters.aptPackages }}
-        environment: test
-        gpuTarget: ${{ job.target }}
+- ${{ if eq(parameters.unifiedBuild, False) }}:
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
+    - job: ${{ parameters.componentName }}_test_${{ job.target }}
+      timeoutInMinutes: 270
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
+      condition:
+        and(succeeded(),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
+          eq(${{ parameters.aggregatePipeline }}, False)
+        )
+      variables:
+      - group: common
+      - template: /.azuredevops/variables-global.yml
+      pool: ${{ job.target }}_test_pool
+      workspace:
+        clean: all
+      steps:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+        parameters:
+          preTargetFilter: ${{ parameters.componentName }}
+          gpuTarget: ${{ job.target }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+        parameters:
+          checkoutRef: ${{ parameters.checkoutRef }}
+          dependencyList: ${{ parameters.rocmTestDependencies }}
+          gpuTarget: ${{ job.target }}
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+        parameters:
+          componentName: ${{ parameters.componentName }}
+          testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+        parameters:
+          aptPackages: ${{ parameters.aptPackages }}
+          environment: test
+          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -21,11 +21,25 @@ parameters:
    - libtbb-dev
    - libtiff-dev
    - libva-amdgpu-dev
+    - libva2-amdgpu
+    - mesa-amdgpu-va-drivers
    - libavcodec-dev
    - libavformat-dev
    - libavutil-dev
    - ninja-build
    - python3-pip
+    - protobuf-compiler
+    - libprotoc-dev
+- name: pipModules
+  type: object
+  default:
+    - future==1.0.0
+    - pytz==2022.1
+    - numpy==1.23
+    - google==3.0.0
+    - protobuf==3.12.4
+    - onnx==1.12.0
+    - nnef==1.0.7
 - name: rocmDependencies
  type: object
  default:
@@ -33,6 +47,7 @@ parameters:
    - aomp
    - aomp-extras
    - clr
+    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
@@ -47,6 +62,7 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
+    - rccl
    - rocALUTION
    - rocBLAS
    - rocDecode
@@ -69,6 +85,7 @@ parameters:
    - aomp
    - aomp-extras
    - clr
+    - half
    - composable_kernel
    - hipBLAS
    - hipBLAS-common
@@ -83,6 +100,7 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
+    - rccl
    - rocALUTION
    - rocBLAS
    - rocDecode
--- a/.azuredevops/components/rocprofiler-sdk.yml
+++ b/.azuredevops/components/rocprofiler-sdk.yml
@@ -131,6 +131,11 @@ jobs:
        script: |
          USER_BASE=$(python3 -m site --user-base)
          echo "##vso[task.prependpath]$USER_BASE/bin"
+    - task: Bash@3
+      displayName: libelf hack
+      inputs:
+        targetType: inline
+        script: cp $(Agent.BuildDirectory)/s/cmake/Modules/Findlibelf.cmake $(Agent.BuildDirectory)/s/cmake/Modules/FindLibElf.cmake
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        componentName: ${{ parameters.componentName }}
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -263,7 +263,7 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    rocWMMA:
-      pipelineId: 109
+      pipelineId: 370
      developBranch: develop
      hasGpuTarget: true
    rpp:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -141,7 +141,6 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 #### Optimized

 * `TF32` kernel optimization for the AMD Instinct MI355X GPU to enhance training and inference efficiency.
-* Meta Model optimization for the AMD Instinct MI350X GPU to enable better performance across transformer-based models.

 #### Resolved issues

@@ -579,6 +578,8 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc

 * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
  * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
+* The profile mode crashes when `--format-rocprof-output json` is selected.
+  * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  

 ### **ROCm Data Center Tool** (1.2.0)

@@ -694,7 +695,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 * Updated error handling for several rocRAND unit tests to accommodate the new `hipGetLastError` behavior that was introduced in ROCm 7.0.
 As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastError` rather than on every HIP API call.

-### **rocSOLVER** (3.30.0)
+### **rocSOLVER** (3.31.0)

 #### Added

@@ -1044,10 +1045,6 @@ for a complete overview of this release.

 - `amd-smi monitor` on Linux Guest systems triggers an attribute error.

-```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.0/CHANGELOG.md) for details, examples, and in-depth descriptions.
-```
-
 ### **Composable Kernel** (1.1.0) 

 #### Added
@@ -2364,7 +2361,7 @@ The previous default accumulator types could lead to situations in which unexpec

 #### Added

-* Hybrid computation support for existing routines: STEQR
+* Hybrid computation support for existing STEQR routines.

 #### Optimized

--- a/RELEASE.md
+++ b/RELEASE.md
@@ -44,7 +44,6 @@ The following are notable new features and improvements in ROCm 7.1.0. For chang

 ROCm 7.1.0 extends the operating system support for the following AMD hardware:

-* AMD Instinct MI355X and MI350X GPUs add support for Debian 13.
 * AMD Instinct MI325X adds support for RHEL 10.0, SLES15 SP7, Debian 13, Debian 12, Oracle Linux 10, and Oracle Linux 9.
 * AMD Instinct MI100 adds support for SLES 15 SP7.

@@ -212,7 +211,6 @@ hipBLASLt introduces several performance and model compatibility improvements fo

 * TF32 kernel optimization for AMD Instinct MI355X GPUs to enhance training and inference efficiency.
 * FP32 kernel optimization for AMD Instinct MI350X GPUs, improving precision-based workloads.
-* Meta model optimization for AMD Instinct MI350X GPUs, enabling better performance across transformer-based models.
 * Llama 2 70B model support fix for AMD Instinct MI350X GPUs: Removed incorrect kernel to ensure accurate and stable execution.
 * For AMD Instinct MI350X GPUs, added multiple high-performance kernels optimized for `FP16` and `BF16` data types, enhancing heuristic-based execution.
 * FP8 low-precision data type operations on AMD Instinct MI350X GPUs. This update adds FP8 support for the Instinct MI350X using the hipBLASLt low-precision data type functionality.
@@ -465,7 +463,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
            </tr>
            <tr>
                <td><a href="https://rocm.docs.amd.com/projects/hipFFT/en/docs-7.1.0/index.html">hipFFT</a></td>
-                <td>1.0.20&nbsp;&Rightarrow;&nbsp;<a href="#hipfft-1-0-20">1.0.21</a></td>
+                <td>1.0.20&nbsp;&Rightarrow;&nbsp;<a href="#hipfft-1-0-21">1.0.21</a></td>
                <td><a href="https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipfft"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
            <tr>
@@ -481,7 +479,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
            <tr>
                <td><a href="https://rocm.docs.amd.com/projects/hipSOLVER/en/docs-7.1.0/index.html">hipSOLVER</a></td>
                <td>3.0.0&nbsp;&Rightarrow;&nbsp;<a href="#hipsolver-3-1-0">3.1.0</a></td>
-                <td><a href="https://github.com/ROCm/hipSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
+                <td><a href="https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsolver"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
            <tr>
                <td><a href="https://rocm.docs.amd.com/projects/hipSPARSE/en/docs-7.1.0/index.html">hipSPARSE</a></td>
@@ -516,7 +514,7 @@ Click {fab}`github` to go to the component's source code on GitHub.
            <tr>
                <td><a href="https://rocm.docs.amd.com/projects/rocSOLVER/en/docs-7.1.0/index.html">rocSOLVER</a></td>
                <td>3.30.1&nbsp;&Rightarrow;&nbsp;<a href="#rocsolver-3-31-0">3.31.0</a></td>
-                <td><a href="https://github.com/ROCm/rocSOLVER"><i class="fab fa-github fa-lg"></i></a></td>
+                <td><a href="https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsolver"><i class="fab fa-github fa-lg"></i></a></td>
            </tr>
            <tr>
                <td><a href="https://rocm.docs.amd.com/projects/rocSPARSE/en/docs-7.1.0/index.html">rocSPARSE</a></td>
@@ -742,10 +740,6 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid

 * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.

-```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions.
-```
-
 ### **Composable Kernel** (1.1.0)

 #### Added
@@ -835,7 +829,6 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 #### Optimized

 * `TF32` kernel optimization for the AMD Instinct MI355X GPU to enhance training and inference efficiency.
-* Meta Model optimization for the AMD Instinct MI350X GPU to enable better performance across transformer-based models.

 #### Resolved issues

@@ -1273,6 +1266,8 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc

 * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
  * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
+* The profile mode crashes when `--format-rocprof-output json` is selected.
+    * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  

 ### **ROCm Data Center Tool** (1.2.0)

@@ -1388,11 +1383,11 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 * Updated error handling for several rocRAND unit tests to accommodate the new `hipGetLastError` behavior that was introduced in ROCm 7.0.
 As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastError` rather than on every HIP API call.

-### **rocSOLVER** (3.30.0)
+### **rocSOLVER** (3.31.0)

 #### Added

-* Hybrid computation support for existing routines: STEQR
+* Hybrid computation support for existing STEQR routines.

 #### Optimized

@@ -1491,6 +1486,18 @@ ls -l /opt/rocm-7.0.0/lib/libmigraphx_py_*.so
 ```
 The issue will be resolved in a future ROCm release. See [GitHub issue #5500](https://github.com/ROCm/ROCm/issues/5500).

+### rocprofv3 fails on RPM-based OS with Python 3.10 (and later)
+
+On RPM-based operating systems (such as RHEL 8), the `rocprofv3` tool fails with Python 3.10 and later due to missing ROCPD bindings. As a workaround, use Python 3.6 if you need to use the `rocprofv3` tool with ROCm 7.1.0. This issue will be fixed in a future ROCm release. See [GitHub issue #5606](https://github.com/ROCm/ROCm/issues/5606).
+
+### ROCgdb might fail on SR-IOV guest VMs
+
+ROCgdb might fail when running the `step-schedlock-spurious-waves.exp` test case on SR-IOV guest virtual machines (VMs). As a workaround, avoid running an inferior in ROCgdb if a background process is already heavily utilizing the GPU. The issue is currently under investigation and will be fixed in a future ROCm release. See [GitHub issue #5607](https://github.com/ROCm/ROCm/issues/5607).
+
+### Issue uninstalling ROCm Bandwidth Test using amdgpu-install script
+
+Due to a missing `rocm-core` dependency from the ROCm Bandwidth Test, you can't cleanly uninstall ROCm Bandwidth Test using the `amdgpu-install` script. As a workaround, uninstall ROCm Bandwidth Test manually, using the native package managers. For more information, see [Installation via native package manager](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager-index.html). The issue will be fixed in a future ROCm release. See [GitHub issue #5611](https://github.com/ROCm/ROCm/issues/5611).
+
 ## ROCm resolved issues

 The following are previously known issues resolved in this release. For resolved issues related to
--- a/docs/compatibility/compatibility-matrix-historical-6.0.csv
+++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv
@@ -49,8 +49,8 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6
      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
      ,,,,,,,,,,,,,,,,,,,,,
      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
-      Thrust,2.6.0,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
-      CUB,2.6.0,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      Thrust,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
+      CUB,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
      ,,,,,,,,,,,,,,,,,,,,,
     DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
@@ -96,7 +96,7 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6
      :doc:`rocThrust <rocthrust:index>`,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
      ,,,,,,,,,,,,,,,,,,,,,
      SUPPORT LIBS,,,,,,,,,,,,,,,,,,,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.1.25414,7.0.51831,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      `hipother <https://github.com/ROCm/hipother>`_,7.1.25424,7.0.51831,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
      ,,,,,,,,,,,,,,,,,,,,,
@@ -126,12 +126,12 @@ ROCm Version,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6
      COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
-      `Flang <https://github.com/ROCm/flang>`_,20.0.025413,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      :doc:`llvm-project <llvm-project:index>`,20.0.025413,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025413,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `Flang <https://github.com/ROCm/flang>`_,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      :doc:`llvm-project <llvm-project:index>`,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
      ,,,,,,,,,,,,,,,,,,,,,
      RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.25414,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
-      :doc:`HIP <hip:index>`,7.1.25414,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
+      :doc:`HIP <hip:index>`,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -66,8 +66,8 @@ compatibility and system requirements.
      `UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0
      ,,,
      THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
-      Thrust,2.6.0,2.6.0,2.5.0
-      CUB,2.6.0,2.6.0,2.5.0
+      Thrust,2.8.5,2.6.0,2.5.0
+      CUB,2.8.5,2.6.0,2.5.0
      ,,,
      DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
      :doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.0, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
@@ -113,7 +113,7 @@ compatibility and system requirements.
      :doc:`rocThrust <rocthrust:index>`,4.1.0,4.0.0,3.3.0
      ,,,
      SUPPORT LIBS,,,
-      `hipother <https://github.com/ROCm/hipother>`_,7.1.25414,7.0.51831,6.4.43482
+      `hipother <https://github.com/ROCm/hipother>`_,7.1.25424,7.0.51831,6.4.43482
      `rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.0,7.0.2,6.4.0
      `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
      ,,,
@@ -143,13 +143,13 @@ compatibility and system requirements.
      COMPILERS,.. _compilers-support-compatibility-matrix:,,
      `clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
      :doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
-      `Flang <https://github.com/ROCm/flang>`_,20.0.025413,20.0.0.25385,19.0.0.25133
-      :doc:`llvm-project <llvm-project:index>`,20.0.025413,20.0.0.25385,19.0.0.25133
-      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025413,20.0.0.25385,19.0.0.25133
+      `Flang <https://github.com/ROCm/flang>`_,20.0.025425,20.0.0.25385,19.0.0.25133
+      :doc:`llvm-project <llvm-project:index>`,20.0.025425,20.0.0.25385,19.0.0.25133
+      `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025425,20.0.0.25385,19.0.0.25133
      ,,,
      RUNTIMES,.. _runtime-support-compatibility-matrix:,,
-      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.25414,7.0.51831,6.4.43482
-      :doc:`HIP <hip:index>`,7.1.25414,7.0.51831,6.4.43482
+      :doc:`AMD CLR <hip:understand/amd_clr>`,7.1.25424,7.0.51831,6.4.43482
+      :doc:`HIP <hip:index>`,7.1.25424,7.0.51831,6.4.43482
      `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
      :doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0

@@ -164,7 +164,7 @@ compatibility and system requirements.
 .. [#ol-710-mi300x] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
 .. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
 .. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
-.. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
+.. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs.
 .. [#db12-710] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
 .. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
 .. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
@@ -270,7 +270,7 @@ Expand for full historical view of:
   .. [#ol-710-mi300x-past-60] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
   .. [#ol-700-mi300x-past-60] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
   .. [#mi300x-past-60] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
-   .. [#db-710-mi300x-past-60] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs.
+   .. [#db-710-mi300x-past-60] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs.
   .. [#db12-710-past-60] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
   .. [#db-mi300x-past-60] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
   .. [#single-node-past-60] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
@@ -0,0 +1,316 @@
+dockers:
+  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
+    components:
+      ROCm: 7.0.0
+      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
+      PyTorch: 2.9.0a0+git1c57644
+      hipBLASLt: 1.0.0
+    dockerfile:
+      commit: 790d22168820507f3105fef29596549378cfe399
+model_groups:
+  - group: Meta Llama
+    tag: llama
+    models:
+      - model: Llama 2 70B
+        mad_tag: pyt_vllm_llama-2-70b
+        model_repo: meta-llama/Llama-2-70b-chat-hf
+        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 4096
+          max_model_len: 4096
+      - model: Llama 3.1 8B
+        mad_tag: pyt_vllm_llama-3.1-8b
+        model_repo: meta-llama/Llama-3.1-8B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 8B FP8
+        mad_tag: pyt_vllm_llama-3.1-8b_fp8
+        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B
+        mad_tag: pyt_vllm_llama-3.1-405b
+        model_repo: meta-llama/Llama-3.1-405B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B FP8
+        mad_tag: pyt_vllm_llama-3.1-405b_fp8
+        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.1 405B MXFP4
+        mad_tag: pyt_vllm_llama-3.1-405b_fp4
+        model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B
+        mad_tag: pyt_vllm_llama-3.3-70b
+        model_repo: meta-llama/Llama-3.3-70B-Instruct
+        url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B FP8
+        mad_tag: pyt_vllm_llama-3.3-70b_fp8
+        model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 3.3 70B MXFP4
+        mad_tag: pyt_vllm_llama-3.3-70b_fp4
+        model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
+        precision: float4
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+      - model: Llama 4 Scout 17Bx16E
+        mad_tag: pyt_vllm_llama-4-scout-17b-16e
+        model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Llama 4 Maverick 17Bx128E FP8
+        mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
+        model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: DeepSeek
+    tag: deepseek
+    models:
+      - model: DeepSeek R1 0528 FP8
+        mad_tag: pyt_vllm_deepseek-r1
+        model_repo: deepseek-ai/DeepSeek-R1-0528
+        url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_seqs: 1024
+          max_num_batched_tokens: 131072
+          max_model_len: 8192
+  - group: OpenAI GPT OSS
+    tag: gpt-oss
+    models:
+      - model: GPT OSS 20B
+        mad_tag: pyt_vllm_gpt-oss-20b
+        model_repo: openai/gpt-oss-20b
+        url: https://huggingface.co/openai/gpt-oss-20b
+        precision: bfloat16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
+      - model: GPT OSS 120B
+        mad_tag: pyt_vllm_gpt-oss-120b
+        model_repo: openai/gpt-oss-120b
+        url: https://huggingface.co/openai/gpt-oss-120b
+        precision: bfloat16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 8192
+          max_model_len: 8192
+  - group: Mistral AI
+    tag: mistral
+    models:
+      - model: Mixtral MoE 8x7B
+        mad_tag: pyt_vllm_mixtral-8x7b
+        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x7B FP8
+        mad_tag: pyt_vllm_mixtral-8x7b_fp8
+        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 32768
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B
+        mad_tag: pyt_vllm_mixtral-8x22b
+        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
+        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
+      - model: Mixtral MoE 8x22B FP8
+        mad_tag: pyt_vllm_mixtral-8x22b_fp8
+        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 65536
+          max_model_len: 8192
+  - group: Qwen
+    tag: qwen
+    models:
+      - model: Qwen3 8B
+        mad_tag: pyt_vllm_qwen3-8b
+        model_repo: Qwen/Qwen3-8B
+        url: https://huggingface.co/Qwen/Qwen3-8B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 32B
+        mad_tag: pyt_vllm_qwen3-32b
+        model_repo: Qwen/Qwen3-32b
+        url: https://huggingface.co/Qwen/Qwen3-32B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B
+        mad_tag: pyt_vllm_qwen3-30b-a3b
+        model_repo: Qwen/Qwen3-30B-A3B
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 30B A3B FP8
+        mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
+        model_repo: Qwen/Qwen3-30B-A3B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B
+        mad_tag: pyt_vllm_qwen3-235b-a22b
+        model_repo: Qwen/Qwen3-235B-A22B
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B
+        precision: float16
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+      - model: Qwen3 235B A22B FP8
+        mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
+        model_repo: Qwen/Qwen3-235B-A22B-FP8
+        url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
+        precision: float8
+        config:
+          tp: 8
+          dtype: auto
+          kv_cache_dtype: fp8
+          max_num_batched_tokens: 40960
+          max_model_len: 8192
+  - group: Microsoft Phi
+    tag: phi
+    models:
+      - model: Phi-4
+        mad_tag: pyt_vllm_phi-4
+        model_repo: microsoft/phi-4
+        url: https://huggingface.co/microsoft/phi-4
+        precision: float16
+        config:
+          tp: 1
+          dtype: auto
+          kv_cache_dtype: auto
+          max_num_batched_tokens: 16384
+          max_model_len: 8192
--- a/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml
@@ -1,13 +1,13 @@
 dockers:
-  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
-    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
+  - pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.1_20251103
+    docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.1_20251103/images/sha256-8d60429043d4d00958da46039a1de0d9b82df814d45da482497eef26a6076506
    components:
      ROCm: 7.0.0
-      vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
+      vLLM: 0.11.1 (0.11.1rc2.dev141+g38f225c2a.rocm700)
      PyTorch: 2.9.0a0+git1c57644
      hipBLASLt: 1.0.0
    dockerfile:
-      commit: 790d22168820507f3105fef29596549378cfe399
+      commit: 38f225c2abeadc04c2cc398814c2f53ea02c3c72
 model_groups:
  - group: Meta Llama
    tag: llama
--- a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
@@ -46,6 +46,8 @@ The following variables are generally useful for Instinct MI300X/MI355X GPUs and
    multi-GPU distributed workloads** (tensor parallelism, pipeline
    parallelism). Single-GPU inference does not need this.

+.. _vllm-optimization-aiter-switches:
+
 AITER (AI Tensor Engine for ROCm) switches
 ==========================================

--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006.rst
@@ -0,0 +1,482 @@
+:orphan:
+
+.. meta::
+   :description: Learn how to validate LLM inference performance on MI300X GPUs using AMD MAD and the ROCm vLLM Docker image.
+   :keywords: model, MAD, automation, dashboarding, validate
+
+**********************************
+vLLM inference performance testing
+**********************************
+
+.. caution::
+
+   This documentation does not reflect the latest version of ROCm vLLM
+   inference performance documentation. See :doc:`../vllm` for the latest version.
+
+.. _vllm-benchmark-unified-docker-930:
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+
+   The `ROCm vLLM Docker <{{ docker.docker_hub_url }}>`_ image offers a
+   prebuilt, optimized environment for validating large language model (LLM)
+   inference performance on AMD Instinct™ MI355X, MI350X, MI325X and MI300X
+   GPUs. This ROCm vLLM Docker image integrates vLLM and PyTorch tailored
+   specifically for AMD data center GPUs and includes the following components:
+
+   .. tab-set::
+
+      .. tab-item:: {{ docker.pull_tag }}
+
+         .. list-table::
+            :header-rows: 1
+
+            * - Software component
+              - Version
+
+            {% for component_name, component_version in docker.components.items() %}
+            * - {{ component_name }}
+              - {{ component_version }}
+            {% endfor %}
+
+With this Docker image, you can quickly test the :ref:`expected
+inference performance numbers <vllm-benchmark-performance-measurements-930>` for
+AMD Instinct GPUs.
+
+What's new
+==========
+
+The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <vllm-history>`.
+
+* Added support for AMD Instinct MI355X and MI350X GPUs.
+
+* Added support and benchmarking instructions for the following models. See :ref:`vllm-benchmark-supported-models-930`.
+
+  * Llama 4 Scout and Maverick
+
+  * DeepSeek R1 0528 FP8
+
+  * MXFP4 models (MI355X and MI350X only): Llama 3.3 70B MXFP4 and Llama 3.1 405B MXFP4
+
+  * GPT OSS 20B and 120B
+
+  * Qwen 3 32B, 30B-A3B, and 235B-A22B
+
+* Removed the deprecated ``--max-seq-len-to-capture`` flag.
+
+* ``--gpu-memory-utilization`` is now configurable via the `configuration files
+  <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__ in the MAD
+  repository.
+
+.. _vllm-benchmark-supported-models-930:
+
+Supported models
+================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   .. _vllm-benchmark-available-models-930:
+
+   The following models are supported for inference performance benchmarking
+   with vLLM and ROCm. Some instructions, commands, and recommendations in this
+   documentation might vary by model -- select one to get started. MXFP4 models
+   are only supported on MI355X and MI350X GPUs.
+
+   .. raw:: html
+
+      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
+         <div class="row gx-0">
+            <div class="col-2 me-1 px-2 model-param-head">Model</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+               <div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+      {% endfor %}
+            </div>
+         </div>
+
+         <div class="row gx-0 pt-1">
+            <div class="col-2 me-1 px-2 model-param-head">Variant</div>
+            <div class="row col-10 pe-0">
+      {% for model_group in model_groups %}
+         {% set models = model_group.models %}
+         {% for model in models %}
+            {% if models|length % 3 == 0 %}
+               <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% else %}
+               <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+            {% endif %}
+         {% endfor %}
+      {% endfor %}
+            </div>
+         </div>
+      </div>
+
+   .. _vllm-benchmark-vllm-930:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{ model.mad_tag }}
+
+
+      {% if model.precision == "float4" %}
+      .. important::
+
+         MXFP4 is supported only on MI355X and MI350X GPUs.
+      {% endif %}
+
+      .. note::
+
+         See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
+         Some models require access authorization prior to use via an external license agreement through a third party.
+      {% if model.precision == "float8" and model.model_repo.startswith("amd") %}
+         This model uses FP8 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
+      {% endif %}
+      {% if model.precision == "float4" and model.model_repo.startswith("amd") %}
+         This model uses FP4 quantization via `AMD Quark <https://quark.docs.amd.com/latest/>`__ for efficient inference on AMD GPUs.
+      {% endif %}
+
+      {% endfor %}
+   {% endfor %}
+
+.. _vllm-benchmark-performance-measurements-930:
+
+Performance measurements
+========================
+
+To evaluate performance, the
+`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+page provides reference throughput and serving measurements for inferencing popular AI models.
+
+.. important::
+
+   The performance data presented in
+   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
+   only reflects the latest version of this inference benchmarking environment.
+   The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct GPUs or ROCm software.
+
+System validation
+=================
+
+Before running AI workloads, it's important to validate that your AMD hardware is configured
+correctly and performing optimally.
+
+If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+before starting training.
+
+To test for optimal performance, consult the recommended :ref:`System health benchmarks
+<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+system's configuration.
+
+Pull the Docker image
+=====================
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+
+   Download the `ROCm vLLM Docker image <{{ docker.docker_hub_url }}>`_.
+   Use the following command to pull the Docker image from Docker Hub.
+
+   .. code-block:: shell
+
+      docker pull {{ docker.pull_tag }}
+
+Benchmarking
+============
+
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+   {% set docker = data.dockers[0] %}
+   {% set model_groups = data.model_groups %}
+
+   Once the setup is complete, choose between two options to reproduce the
+   benchmark results:
+
+   .. _vllm-benchmark-mad-930:
+
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+
+   .. container:: model-doc {{model.mad_tag}}
+
+      .. tab-set::
+
+         .. tab-item:: MAD-integrated benchmarking
+
+            The following run command is tailored to {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+
+            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
+               directory and install the required packages on the host machine.
+
+               .. code-block:: shell
+
+                  git clone https://github.com/ROCm/MAD
+                  cd MAD
+                  pip install -r requirements.txt
+
+            2. On the host machine, use this command to run the performance benchmark test on
+               the `{{model.model}} <{{ model.url }}>`_ model using one node with the
+               :literal:`{{model.precision}}` data type.
+
+               .. code-block:: shell
+
+                  export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
+                  madengine run \
+                      --tags {{model.mad_tag}} \
+                      --keep-model-dir \
+                      --live-output
+
+            MAD launches a Docker container with the name
+            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
+            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
+            and ``{{ model.mad_tag }}_serving.csv``.
+
+            Although the :ref:`available models
+            <vllm-benchmark-available-models-930>` are preconfigured to collect
+            offline throughput and online serving performance data, you can
+            also change the benchmarking parameters. See the standalone
+            benchmarking tab for more information.
+
+            {% if model.tunableop %}
+
+            .. note::
+
+               For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
+               TunableOp automatically explores different implementations and configurations of certain PyTorch
+               operators to find the fastest one for your hardware.
+
+               By default, ``{{model.mad_tag}}`` runs with TunableOp disabled (see
+               `<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable it, include
+               the ``--tunableop on`` argument in your run.
+
+               Enabling TunableOp triggers a two-pass run -- a warm-up followed by the
+               performance-collection run.
+
+            {% endif %}
+
+         .. tab-item:: Standalone benchmarking
+
+            The following commands are optimized for {{ model.model }}.
+            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+
+            .. seealso::
+
+               For more information on configuration, see the `config files
+               <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__
+               in the MAD repository. Refer to the `vLLM engine <https://docs.vllm.ai/en/latest/configuration/engine_args.html#engineargs>`__
+               for descriptions of available configuration options
+               and `Benchmarking vLLM <https://github.com/vllm-project/vllm/blob/main/benchmarks/README.md>`__ for
+               additional benchmarking information.
+
+            .. rubric:: Launch the container
+
+            You can run the vLLM benchmark tool independently by starting the
+            `Docker container <{{ docker.docker_hub_url }}>`_ as shown
+            in the following snippet.
+
+            .. code-block:: shell
+
+               docker pull {{ docker.pull_tag }}
+               docker run -it \
+                   --device=/dev/kfd \
+                   --device=/dev/dri \
+                   --group-add video \
+                   --shm-size 16G \
+                   --security-opt seccomp=unconfined \
+                   --security-opt apparmor=unconfined \
+                   --cap-add=SYS_PTRACE \
+                   -v $(pwd):/workspace \
+                   --env HUGGINGFACE_HUB_CACHE=/workspace \
+                   --name test \
+                   {{ docker.pull_tag }}
+
+            .. rubric:: Throughput command
+
+            Use the following command to start the throughput benchmark.
+
+            .. code-block:: shell
+
+               model={{ model.model_repo }}
+               tp={{ model.config.tp }}
+               num_prompts={{ model.config.num_prompts | default(1024) }}
+               in={{ model.config.in | default(128) }}
+               out={{ model.config.in | default(128) }}
+               dtype={{ model.config.dtype | default("auto") }}
+               kv_cache_dtype={{ model.config.kv_cache_dtype }}
+               max_num_seqs={{ model.config.max_num_seqs | default(1024) }}
+               max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+               max_model_len={{ model.config.max_model_len }}
+
+               vllm bench throughput --model $model \
+                   -tp $tp \
+                   --num-prompts $num_prompts \
+                   --input-len $in \
+                   --output-len $out \
+                   --dtype $dtype \
+                   --kv-cache-dtype $kv_cache_dtype \
+                   --max-num-seqs $max_num_seqs \
+                   --max-num-batched-tokens $max_num_batched_tokens \
+                   --max-model-len $max_model_len \
+                   --trust-remote-code \
+                   --output-json ${model}_throughput.json \
+                   --gpu-memory-utilization {{ model.config.gpu_memory_utilization | default(0.9) }}
+
+            .. rubric:: Serving command
+
+            1. Start the server using the following command:
+
+               .. code-block:: shell
+
+                  model={{ model.model_repo }}
+                  tp={{ model.config.tp }}
+                  dtype={{ model.config.dtype }}
+                  kv_cache_dtype={{ model.config.kv_cache_dtype }}
+                  max_num_seqs=256
+                  max_num_batched_tokens={{ model.config.max_num_batched_tokens }}
+                  max_model_len={{ model.config.max_model_len }}
+
+                  vllm serve $model \
+                      -tp $tp \
+                      --dtype $dtype \
+                      --kv-cache-dtype $kv_cache_dtype \
+                      --max-num-seqs $max_num_seqs \
+                      --max-num-batched-tokens $max_num_batched_tokens \
+                      --max-model-len $max_model_len \
+                      --no-enable-prefix-caching \
+                      --swap-space 16 \
+                      --disable-log-requests \
+                      --trust-remote-code \
+                      --gpu-memory-utilization 0.9
+
+               Wait until the model has loaded and the server is ready to accept requests.
+
+            2. On another terminal on the same machine, run the benchmark:
+
+               .. code-block:: shell
+
+                  # Connect to the container
+                  docker exec -it test bash
+
+                  # Wait for the server to start
+                  until curl -s http://localhost:8000/v1/models; do sleep 30; done
+
+                  # Run the benchmark
+                  model={{ model.model_repo }}
+                  max_concurrency=1
+                  num_prompts=10
+                  in=128
+                  out=128
+                  vllm bench serve --model $model \
+                      --percentile-metrics "ttft,tpot,itl,e2el" \
+                      --dataset-name random \
+                      --ignore-eos \
+                      --max-concurrency $max_concurrency \
+                      --num-prompts $num_prompts \
+                      --random-input-len $in \
+                      --random-output-len $out \
+                      --trust-remote-code \
+                      --save-result \
+                      --result-filename ${model}_serving.json
+
+            .. note::
+
+               For improved performance with certain Mixture of Experts models, such as Mixtral 8x22B,
+               try adding ``export VLLM_ROCM_USE_AITER=1`` to your commands.
+
+               If you encounter the following error, pass your access-authorized Hugging
+               Face token to the gated models.
+
+               .. code-block::
+
+                  OSError: You are trying to access a gated repo.
+
+                  # pass your HF_TOKEN
+                  export HF_TOKEN=$your_personal_hf_token
+
+            .. raw:: html
+
+               <style>
+               mjx-container[jax="CHTML"][display="true"] {
+                  text-align: left;
+                  margin: 0;
+               }
+               </style>
+
+            .. note::
+
+               Throughput is calculated as:
+
+               - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
+
+               - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
+      {% endfor %}
+   {% endfor %}
+
+Advanced usage
+==============
+
+For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
+see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/docs/dev-docker/README.md>`__.
+
+Reproducing the Docker image
+----------------------------
+
+To reproduce this ROCm-enabled vLLM Docker image release, follow these steps:
+
+1. Clone the `vLLM repository <https://github.com/vllm-project/vllm>`__.
+
+   .. code-block:: shell
+
+      git clone https://github.com/vllm-project/vllm.git
+      cd vllm
+
+2. Use the following command to build the image directly from the specified commit.
+
+   .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.10.1_20251006-benchmark-models.yaml
+
+      {% set docker = data.dockers[0] %}
+      .. code-block:: shell
+
+         docker build -f docker/Dockerfile.rocm \
+             --build-arg REMOTE_VLLM=1 \
+             --build-arg VLLM_REPO=https://github.com/ROCm/vllm \
+             --build-arg VLLM_BRANCH="{{ docker.dockerfile.commit }}" \
+             -t vllm-rocm .
+
+   .. tip::
+
+      Replace ``vllm-rocm`` with your desired image tag.
+
+Further reading
+===============
+
+- To learn more about the options for latency and throughput benchmark scripts,
+  see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
+
+- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
+
+- To learn more about system settings and management practices to configure your system for
+  AMD Instinct MI300X Series GPUs, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
+
+- See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
+  a brief introduction to vLLM and optimization strategies.
+
+- For application performance optimization strategies for HPC and AI workloads,
+  including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
+
+- For a list of other ready-made Docker images for AI with ROCm, see
+  `AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
+
+Previous versions
+=================
+
+See :doc:`vllm-history` to find documentation for previous releases
+of the ``ROCm/vllm`` Docker image.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
@@ -16,14 +16,23 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
     - Components
     - Resources

-   * - ``rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006``
+   * - ``rocm/vllm:rocm7.0.0_vllm_0.11.1_20251024``
       (latest)
+     -
+       * ROCm 7.0.0
+       * vLLM 0.11.1
+       * PyTorch 2.9.0
+     -
+       * :doc:`Documentation <../vllm>`
+       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5>`__
+
+   * - ``rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006``
     -
       * ROCm 7.0.0
       * vLLM 0.10.2
       * PyTorch 2.9.0
     -
-       * :doc:`Documentation <../vllm>`
+       * :doc:`Documentation <vllm-0.10.2-20251006>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5>`__

   * - ``rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909``
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
@@ -6,7 +6,7 @@
 vLLM inference performance testing
 **********************************

-.. _vllm-benchmark-unified-docker-930:
+.. _vllm-benchmark-unified-docker-1024:

 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

@@ -34,7 +34,7 @@ vLLM inference performance testing
            {% endfor %}

 With this Docker image, you can quickly test the :ref:`expected
-inference performance numbers <vllm-benchmark-performance-measurements-930>` for
+inference performance numbers <vllm-benchmark-performance-measurements-1024>` for
 AMD Instinct GPUs.

 What's new
@@ -42,27 +42,13 @@ What's new

 The following is summary of notable changes since the :doc:`previous ROCm/vLLM Docker release <previous-versions/vllm-history>`.

-* Added support for AMD Instinct MI355X and MI350X GPUs.
+* Enabled :ref:`AITER <vllm-optimization-aiter-switches>` by default.

-* Added support and benchmarking instructions for the following models. See :ref:`vllm-benchmark-supported-models-930`.
+* Fixed ``rms_norm`` segfault issue with Qwen 3 235B.

-  * Llama 4 Scout and Maverick
+* Known performance degradation on Llama 4 models due to `an upstream vLLM issue <https://github.com/vllm-project/vllm/issues/26320>`_.

-  * DeepSeek R1 0528 FP8
-
-  * MXFP4 models (MI355X and MI350X only): Llama 3.3 70B MXFP4 and Llama 3.1 405B MXFP4
-
-  * GPT OSS 20B and 120B
-
-  * Qwen 3 32B, 30B-A3B, and 235B-A22B
-
-* Removed the deprecated ``--max-seq-len-to-capture`` flag.
-
-* ``--gpu-memory-utilization`` is now configurable via the `configuration files
-  <https://github.com/ROCm/MAD/tree/develop/scripts/vllm/configs>`__ in the MAD
-  repository.
-
-.. _vllm-benchmark-supported-models-930:
+.. _vllm-benchmark-supported-models-1024:

 Supported models
 ================
@@ -72,7 +58,7 @@ Supported models
   {% set docker = data.dockers[0] %}
   {% set model_groups = data.model_groups %}

-   .. _vllm-benchmark-available-models-930:
+   .. _vllm-benchmark-available-models-1024:

   The following models are supported for inference performance benchmarking
   with vLLM and ROCm. Some instructions, commands, and recommendations in this
@@ -108,7 +94,7 @@ Supported models
         </div>
      </div>

-   .. _vllm-benchmark-vllm-930:
+   .. _vllm-benchmark-vllm-1024:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -136,7 +122,7 @@ Supported models
      {% endfor %}
   {% endfor %}

-.. _vllm-benchmark-performance-measurements-930:
+.. _vllm-benchmark-performance-measurements-1024:

 Performance measurements
 ========================
@@ -192,7 +178,7 @@ Benchmarking
   Once the setup is complete, choose between two options to reproduce the
   benchmark results:

-   .. _vllm-benchmark-mad-930:
+   .. _vllm-benchmark-mad-1024:

   {% for model_group in model_groups %}
      {% for model in model_group.models %}
@@ -204,7 +190,7 @@ Benchmarking
         .. tab-item:: MAD-integrated benchmarking

            The following run command is tailored to {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-1024` to switch to another available model.

            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
               directory and install the required packages on the host machine.
@@ -233,7 +219,7 @@ Benchmarking
            and ``{{ model.mad_tag }}_serving.csv``.

            Although the :ref:`available models
-            <vllm-benchmark-available-models-930>` are preconfigured to collect
+            <vllm-benchmark-available-models-1024>` are preconfigured to collect
            offline throughput and online serving performance data, you can
            also change the benchmarking parameters. See the standalone
            benchmarking tab for more information.
@@ -258,7 +244,7 @@ Benchmarking
         .. tab-item:: Standalone benchmarking

            The following commands are optimized for {{ model.model }}.
-            See :ref:`vllm-benchmark-supported-models-930` to switch to another available model.
+            See :ref:`vllm-benchmark-supported-models-1024` to switch to another available model.

            .. seealso::

@@ -419,6 +405,10 @@ Advanced usage
 For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
 see the developer's guide at `<https://github.com/ROCm/vllm/blob/documentation/docs/dev-docker/README.md>`__.

+.. note::
+
+   If you’re using this Docker image on other AMD GPUs such as the AMD Instinct MI200 Series or Radeon, add ``export VLLM_ROCM_USE_AITER=0`` to your command, since AITER is only supported on gfx942 and gfx950 architectures.
+
 Reproducing the Docker image
 ----------------------------
Author	SHA1	Message	Date
Joseph Macaranas	7ec34dee52	[External CI] Support rocprofiler-sdk libelf changes	2025-11-05 15:27:16 -05:00
yugang-amd	674dc355e4	vLLM 10/24 release (#5626 ) * vLLM 10/24 release * updates per SME inputs * Update docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com>	2025-11-05 11:13:50 -05:00
Adel Johar	c7f3a56811	[Ex CI] Add half, rccl, and dependencies for rpp, mivisionx and rocjpeg	2025-11-05 15:59:15 +01:00
Pratik Basyal	0107fa731e	ROCm Bandwidth test issue added (#5612 )	2025-10-31 18:19:40 -04:00
Pratik Basyal	a87ec360e1	710 known issues update[Batch1] (#5604 ) * Version update * ROCm Bandwidth failure added * Editorial feedback added * Minor change * rocprofv3 issue added * Minor change * ROCgdb issue added * SME feedback incorpprated * Leo's feedback added * ROCm Compute Profiler known issue added * Changelog synced	2025-10-31 14:57:13 -04:00
amd-hsivasun	7215e1e8c7	[Ex CI] Update rocwmma pipeline ID to monorepo (#5602 )	2025-10-31 13:56:17 -04:00
amd-hsivasun	e4a59d8c66	[Ex CI] Enable rocWMMA Monorepo (#5597 ) * [Ex CI] Enable rocWMMA Monorepo * Updated to use component name parameter	2025-10-30 13:43:05 -04:00
Pratik Basyal	8108fe7275	7.1.0 Post GA updates (#5600 ) * Post GA updates * Mono repo link added * AMD SMI changelog link removed	2025-10-30 13:27:25 -04:00