mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-10 15:18:11 -05:00
Compare commits
48 Commits
docs/6.4.1
...
docs/6.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6815b69793 | ||
|
|
2f192a4768 | ||
|
|
0f80a3b068 | ||
|
|
c5bf17e358 | ||
|
|
5b7315f5b1 | ||
|
|
4302900e68 | ||
|
|
fe5fe6c4d0 | ||
|
|
aa5e1b98a9 | ||
|
|
8789d63887 | ||
|
|
0f61f2e8e0 | ||
|
|
f9586abf5a | ||
|
|
14f9c58e3a | ||
|
|
41383b3c77 | ||
|
|
2e7231fad2 | ||
|
|
ae158f00f6 | ||
|
|
275448a26e | ||
|
|
f6bf909ad1 | ||
|
|
6ae116f03c | ||
|
|
0c19e2da62 | ||
|
|
73d9fb01ab | ||
|
|
42085c14e6 | ||
|
|
e4774085da | ||
|
|
b6f9b58984 | ||
|
|
3b94287182 | ||
|
|
6366347c59 | ||
|
|
fc17a21260 | ||
|
|
e625ee55f6 | ||
|
|
78a93b63b2 | ||
|
|
b7be98cf43 | ||
|
|
4c0199abd9 | ||
|
|
84d44a7c3c | ||
|
|
f622b8796b | ||
|
|
6b2987d9b5 | ||
|
|
4c65fa36da | ||
|
|
18a14f8187 | ||
|
|
b9183c1b7d | ||
|
|
6c1ec0e9a4 | ||
|
|
f8fcdbeac6 | ||
|
|
0411a5a806 | ||
|
|
52ea80d262 | ||
|
|
a9d53e3d4e | ||
|
|
5ab19055ee | ||
|
|
a6f550d716 | ||
|
|
c835c16d68 | ||
|
|
af8f67a155 | ||
|
|
c32d002239 | ||
|
|
16dc949166 | ||
|
|
bf36a27e0b |
@@ -75,6 +75,8 @@ jobs:
|
||||
dependencySource: fixed
|
||||
fixedComponentName: half
|
||||
fixedPipelineIdentifier: $(half560-pipeline-id)
|
||||
skipLibraryLinking: true
|
||||
skipLlvmSymlink: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
|
||||
@@ -58,6 +58,9 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
|
||||
@@ -25,6 +25,7 @@ parameters:
|
||||
- rocBLAS
|
||||
- hipBLAS
|
||||
- hipBLASLt
|
||||
- hipBLAS-common
|
||||
- half
|
||||
- composable_kernel
|
||||
- rocm-cmake
|
||||
|
||||
@@ -69,12 +69,6 @@ jobs:
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# Set link to redirect llvm folder
|
||||
- task: Bash@3
|
||||
displayName: create symlink
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
|
||||
@@ -24,9 +24,8 @@ jobs:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml
|
||||
parameters:
|
||||
useDefaultBranch: false
|
||||
componentName: HIP
|
||||
branchName: develop
|
||||
pipelineId: $(hip-pipeline-id)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Agent.BuildDirectory)/rocm
|
||||
|
||||
@@ -23,7 +23,7 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
@@ -93,12 +93,6 @@ jobs:
|
||||
- script: sudo make install
|
||||
displayName: Install hipBLASLt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# Set link to redirect llvm folder
|
||||
- task: Bash@3
|
||||
displayName: Symlink to rocm/lib/llvm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
|
||||
@@ -73,7 +73,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
@@ -105,6 +105,7 @@ parameters:
|
||||
# below are additional dependencies not called out by build script, but throw errors during cmake
|
||||
- hipCUB
|
||||
- rocThrust
|
||||
- hipBLAS-common
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
@@ -175,12 +176,10 @@ jobs:
|
||||
dependencySource: staging
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- task: Bash@3
|
||||
displayName: ROCm symbolic links
|
||||
displayName: ROCm symbolic link
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- checkout: self
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch builder
|
||||
|
||||
@@ -11,6 +11,7 @@ parameters:
|
||||
- half
|
||||
- HIP
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- hipCUB
|
||||
- hipFFT
|
||||
@@ -72,6 +73,12 @@ jobs:
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- task: DeleteFiles@1
|
||||
displayName: 'Cleanup checkout space'
|
||||
@@ -84,28 +91,26 @@ jobs:
|
||||
SourceFolder: '$(Build.ArtifactStagingDirectory)'
|
||||
Contents: '/**/*'
|
||||
RemoveDotFiles: true
|
||||
- script: sudo chmod 777 /mnt
|
||||
displayName: 'Set permissions for /mnt'
|
||||
- script: df -h
|
||||
displayName: System disk space before ROCm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
extractToMnt: true
|
||||
skipLibraryLinking: true
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
- script: du -sh /mnt/rocm
|
||||
- script: du -sh $(Agent.BuildDirectory)/rocm
|
||||
displayName: Uncompressed ROCm size
|
||||
- task: ArchiveFiles@2
|
||||
displayName: Compress rocm-nightly
|
||||
inputs:
|
||||
rootFolderOrFile: /mnt/rocm
|
||||
rootFolderOrFile: $(Agent.BuildDirectory)/rocm
|
||||
includeRootFolder: false
|
||||
archiveType: tar
|
||||
tarCompression: gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204.tar.gz
|
||||
archiveFile: $(Build.ArtifactStagingDirectory)/$(Build.DefinitionName)_$(Build.BuildNumber)_ubuntu2204_$(JOB_GPU_TARGET).tar.gz
|
||||
- script: du -sh $(Build.ArtifactStagingDirectory)
|
||||
displayName: Compressed ROCm size
|
||||
- task: PublishPipelineArtifact@1
|
||||
|
||||
@@ -25,10 +25,10 @@ parameters:
|
||||
amdsmi: develop
|
||||
aomp-extras: aomp-dev
|
||||
aomp: aomp-dev
|
||||
clr: develop
|
||||
clr: amd-staging
|
||||
composable_kernel: develop
|
||||
half: rocm
|
||||
HIP: develop
|
||||
HIP: amd-staging
|
||||
hipBLAS: develop
|
||||
hipBLASLt: develop
|
||||
hipBLAS-common: develop
|
||||
@@ -89,7 +89,7 @@ steps:
|
||||
specificBuildWithTriggering: true
|
||||
itemPattern: '**/*${{ parameters.fileFilter }}*'
|
||||
${{ if eq(parameters.latestFromBranch, true) }}:
|
||||
${{ if notIn(parameters.componentName, 'aomp', 'clr', 'rocMLIR') }}: # remove this once these pipelines are functional + up-to-date
|
||||
${{ if notIn(parameters.componentName, 'aomp') }}: # remove this once these pipelines are functional + up-to-date
|
||||
buildVersionToDownload: latestFromBranch # default is 'latest'
|
||||
${{ if eq(parameters.useDefaultBranch, true) }}:
|
||||
branchName: refs/heads/${{ parameters.defaultBranchList[parameters.componentName] }}
|
||||
|
||||
@@ -143,6 +143,11 @@ parameters:
|
||||
- name: skipLibraryLinking
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if llvm-project is not downloaded in a particular call
|
||||
# or if you just don't want the symlink
|
||||
- name: skipLlvmSymlink
|
||||
type: boolean
|
||||
default: false
|
||||
# some ROCm components can specify GPU target and this will affect downloads
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
@@ -229,8 +234,15 @@ steps:
|
||||
pipelineId: ${{ parameters.fixedPipelineIdentifier }}
|
||||
latestFromBranch: false
|
||||
extractToMnt: ${{ parameters.extractToMnt }}
|
||||
# Set link to redirect llvm folder
|
||||
- ${{ if eq(parameters.skipLlvmSymlink, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: Symlink from rocm/llvm to rocm/lib/llvm
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
- task: Bash@3
|
||||
displayName: 'list downloaded ROCm files'
|
||||
displayName: 'List downloaded ROCm files'
|
||||
inputs:
|
||||
targetType: inline
|
||||
${{ if eq(parameters.extractToMnt, true) }}:
|
||||
@@ -239,7 +251,7 @@ steps:
|
||||
script: ls -1R $(Agent.BuildDirectory)/rocm
|
||||
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'link ROCm shared libraries'
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
|
||||
@@ -3,20 +3,19 @@
|
||||
|
||||
version: 2
|
||||
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
formats: [htmlzip]
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/sphinx/requirements.txt
|
||||
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.10"
|
||||
apt_packages:
|
||||
- "doxygen"
|
||||
- "gfortran" # For pre-processing fortran sources
|
||||
- "graphviz" # For dot graphs in doxygen
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/sphinx/requirements.txt
|
||||
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
formats: []
|
||||
|
||||
@@ -36,6 +36,7 @@ Bluefield
|
||||
Bootloader
|
||||
CCD
|
||||
CDNA
|
||||
CHTML
|
||||
CIFAR
|
||||
CLI
|
||||
CLion
|
||||
@@ -69,6 +70,7 @@ Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CuPy
|
||||
Dashboarding
|
||||
DDR
|
||||
DF
|
||||
DGEMM
|
||||
@@ -222,6 +224,7 @@ Mellanox
|
||||
Mellanox's
|
||||
Meta's
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
Multicore
|
||||
Multithreaded
|
||||
MyEnvironment
|
||||
@@ -267,6 +270,8 @@ OpenMPI
|
||||
OpenSSL
|
||||
OpenVX
|
||||
OpenXLA
|
||||
Oversubscription
|
||||
PagedAttention
|
||||
PCC
|
||||
PCI
|
||||
PCIe
|
||||
@@ -288,6 +293,7 @@ PowerShell
|
||||
PyPi
|
||||
PyTorch
|
||||
Qcycles
|
||||
Qwen
|
||||
RAII
|
||||
RAS
|
||||
RCCL
|
||||
@@ -430,6 +436,7 @@ accuracies
|
||||
activations
|
||||
addr
|
||||
alloc
|
||||
allocatable
|
||||
allocator
|
||||
allocators
|
||||
amdgpu
|
||||
@@ -553,6 +560,7 @@ hipfort
|
||||
hipify
|
||||
hipsolver
|
||||
hipsparse
|
||||
hlist
|
||||
hotspotting
|
||||
hpc
|
||||
hpp
|
||||
@@ -576,6 +584,7 @@ intra
|
||||
invariants
|
||||
invocating
|
||||
ipo
|
||||
jax
|
||||
kdb
|
||||
latencies
|
||||
libfabric
|
||||
@@ -595,6 +604,7 @@ migraphx
|
||||
miopen
|
||||
miopengemm
|
||||
mivisionx
|
||||
mjx
|
||||
mkdir
|
||||
mlirmiopen
|
||||
mtypes
|
||||
|
||||
16
RELEASE.md
16
RELEASE.md
@@ -433,7 +433,7 @@ links in the *Version* column to go to the detailed component changelogs.
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/tensile/en/docs-6.2.0">Tensile</a></td>
|
||||
<td><a href="https://github.com/ROCm/tensile/">Tensile</a></td>
|
||||
<td>4.40.0 ⇒ <a href="#tensile-4-41-0">4.41.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/tensile/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
@@ -524,7 +524,7 @@ links in the *Version* column to go to the detailed component changelogs.
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCProfiler/en/docs-6.2.0">ROCProfiler</a></td>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.2.0/">ROCProfiler</a></td>
|
||||
<td>2.0.0 ⇒ <a href="#rocprofiler-2-0-0">2.0.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/ROCProfiler/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
@@ -536,7 +536,7 @@ links in the *Version* column to go to the detailed component changelogs.
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr >
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCTracer/en/docs-6.2.0">ROCTracer</a></td>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/roctracer/en/docs-6.2.0/">ROCTracer</a></td>
|
||||
<td>4.1.0</td>
|
||||
<td><a href="https://github.com/ROCm/ROCTracer/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
@@ -546,19 +546,19 @@ links in the *Version* column to go to the detailed component changelogs.
|
||||
<tr>
|
||||
<th rowspan="5"></th>
|
||||
<th rowspan="5">Development</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPIFY/docs-6.2.0">HIPIFY</a></td>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/HIPIFY/en/docs-6.2.0/">HIPIFY</a></td>
|
||||
<td>17.0.0 ⇒ <a href="#hipify-18-0-0">18.0.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/HIPIFY/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCdbgapi/en/docs-6.2.0">ROCdbgapi</a></td>
|
||||
<td>0.71.0 ⇒ <a href="#rocdbgapi-0-75-0">0.75.0</a></td>
|
||||
<td>0.71.0 ⇒ <a href="#rocdbgapi-0-76-0">0.76.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/ROCdbgapi/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rocm-cmake/en/docs-6.2.0">ROCm CMake</a></td>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/ROCmCMakeBuildTools/en/docs-6.2.0/">ROCm CMake</a></td>
|
||||
<td>0.12.0 ⇒ <a href="#rocm-cmake-0-13-0">0.13.0</a></td>
|
||||
<td><a href="https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.2.0"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
@@ -989,7 +989,7 @@ on GitHub for more information.
|
||||
* `Zihintntl` extension version was upgraded to 1.0 and is no longer experimental.
|
||||
|
||||
* Intrinsics were added for `Zk*`, `Zbb`, and `Zbc`. See
|
||||
[Scalar Bit Manipulation Extension Intrinsics](https://github.com/riscv-non-isa/riscv-c-api-doc/blob/master/riscv-c-api.md#scalar-bit-manipulation-extension-intrinsics) in the RISC-V C API specification.
|
||||
[Scalar Bit Manipulation Extension Intrinsics](https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#scalar-bit-manipulation-extension-intrinsics) in the RISC-V C API specification.
|
||||
|
||||
* Default ABI with F but without D was changed to ilp32f for RV32 and to lp64f for RV64.
|
||||
|
||||
@@ -1524,7 +1524,7 @@ See [issue #3498](https://github.com/ROCm/ROCm/issues/3498) on GitHub.
|
||||
* Deprecated `rocblas_gemm_ex3`, `gemm_batched_ex3` and `gemm_strided_batched_ex3`. They will be removed in the next
|
||||
major release of rocBLAS. Refer to [hipBLASLt](https://github.com/ROCm/hipBLASLt) for future 8-bit float usage.
|
||||
|
||||
### **ROCdbgapi** (0.75.0)
|
||||
### **ROCdbgapi** (0.76.0)
|
||||
|
||||
#### Removals
|
||||
- Renamed `(AMD_DBGAPI_EXCEPTION_WAVE,AMD_DBGAPI_WAVE_STOP_REASON)_APERTURE_VIOLATION` to `(AMD_DBGAPI_EXCEPTION_WAVE,AMD_DBGAPI_WAVE_STOP_REASON)_ADDRESS_ERROR`.
|
||||
|
||||
@@ -17,7 +17,7 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
:stub-columns: 1
|
||||
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`, "Ubuntu 24.04","",""
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4","Ubuntu 22.04.5 [#Ubuntu220405]_, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
@@ -39,12 +39,12 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,".. _framework-support-compatibility-matrix:",,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15, 2.14, 2.13","2.14, 2.13, 2.12"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.14.1
|
||||
,,,
|
||||
THIRD PARTY COMMS,".. _thirdpartycomms-support-compatibility-matrix:",,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.2.0,>=1.2.0,>=1.2.0
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.14.1,>=1.14.1
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,".. _thirdpartyalgorithm-support-compatibility-matrix:",,
|
||||
@@ -56,8 +56,9 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.9.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,2.5.0,2.5.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,N/A
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.5.0,1.4.0
|
||||
:doc:`rocAL <rocal:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,N/A,N/A
|
||||
,,,
|
||||
COMMUNICATION,".. _commlibs-support-compatibility-matrix:",,
|
||||
@@ -118,11 +119,13 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
,,,
|
||||
COMPILERS,".. _compilers-support-compatibility-matrix:",,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
`llvm-project <https://github.com/ROCm/llvm-project>`_,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24232,17.0.0.24193,17.0.0.23483
|
||||
,,,
|
||||
RUNTIMES,".. _runtime-support-compatibility-matrix:",,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41133,6.1.40093,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41133,6.1.40093,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.13.0,1.12.0
|
||||
@@ -131,7 +134,7 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#Ubuntu220405] Preview support of Ubuntu 22.04.5 only
|
||||
.. [#red-hat94] RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#red-hat94] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
@@ -158,14 +161,14 @@ Expand for full historical view of:
|
||||
|
||||
.. csv-table::
|
||||
:file: ../data/reference/compatibility-matrix-historical-6.0.csv
|
||||
:widths: 20,10,10,10,10,10,10
|
||||
:widths: 20,10,10,10,10,10,10,10
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#Ubuntu220405-past-60] Preview support of Ubuntu 22.04.5 only
|
||||
.. [#red-hat94-past-60] RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#red-hat94-past-60] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89-past-60] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
|
||||
@@ -410,7 +410,7 @@ description, refer to the corresponding library data type support page.
|
||||
- ❌/❌
|
||||
- ❌/❌
|
||||
*
|
||||
- rocRAND (:doc:`details <rocrand:data-type-support>`)
|
||||
- rocRAND (:doc:`details <rocrand:api-reference/data-type-support>`)
|
||||
- -/✅
|
||||
- -/✅
|
||||
- -/✅
|
||||
|
||||
@@ -82,6 +82,7 @@ article_pages = [
|
||||
"file": "how-to/llm-fine-tuning-optimization/profiling-and-debugging",
|
||||
"os": ["linux"],
|
||||
},
|
||||
{"file": "how-to/performance-validation/mi300x/vllm-benchmark", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/index", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi200", "os": ["linux"]},
|
||||
|
||||
@@ -1,111 +1,114 @@
|
||||
ROCm Version,6.2.0, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`,Ubuntu 24.04,,,,,
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405-past-60]_, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,,
|
||||
,".. _architecture-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,".. _gpu-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,
|
||||
FRAMEWORK SUPPORT,".. _framework-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15, 2.14, 2.13","2.15, 2.14, 2.13","2.15, 2.14, 2.13","2.14, 2.13, 2.12","2.14, 2.13, 2.12"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,
|
||||
THIRD PARTY COMMS,".. _thirdpartycomms-support-compatibility-matrix-past-60:",,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.2.0,>=1.2.0,>=1.2.0,>=1.2.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,
|
||||
THIRD PARTY ALGORITHM,".. _thirdpartyalgorithm-support-compatibility-matrix-past-60:",,,,,
|
||||
Thrust,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,
|
||||
ML & COMPUTER VISION,".. _mllibs-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,
|
||||
COMMUNICATION,".. _commlibs-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,
|
||||
MATH LIBS,".. _mathlibs-support-compatibility-matrix-past-60:",,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.0,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.28,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
`Tensile <https://github.com/ROCm/Tensile>`_,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,
|
||||
PRIMITIVES,".. _primitivelibs-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,
|
||||
SUPPORT LIBS,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.0,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,
|
||||
SYSTEM MGMT TOOLS,".. _tools-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.2,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.0,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0
|
||||
,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60200,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60200,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,
|
||||
COMPILERS,".. _compilers-support-compatibility-matrix-past-60:",,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`llvm-project <https://github.com/ROCm/llvm-project>`_,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,
|
||||
RUNTIMES,".. _runtime-support-compatibility-matrix-past-60:",,,,,
|
||||
:doc:`HIP <hip:index>`,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
ROCm Version,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`,Ubuntu 24.04,,,,,,
|
||||
,"Ubuntu 22.04.5 [#Ubuntu220405-past-60]_, 22.04.4","Ubuntu 22.04.5 [#Ubuntu220405-past-60]_,22.04.4, 22.04.3","Ubuntu 22.04.5 [#Ubuntu220405-past-60]_,22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
|
||||
,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,
|
||||
Thrust,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
:doc:`rocAL <rocal:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
`Tensile <https://github.com/ROCm/Tensile>`_,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.0,rocm-6.1.5,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0
|
||||
,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
||||
|
@@ -16,7 +16,7 @@ This section discusses how to implement `vLLM <https://docs.vllm.ai/en/latest>`_
|
||||
vLLM inference
|
||||
==============
|
||||
|
||||
vLLM is renowned for its paged attention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
vLLM is renowned for its PagedAttention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
its paging scheme. Instead of allocating GPU high-bandwidth memory (HBM) for the maximum output token lengths of the
|
||||
models, the paged attention of vLLM allocates GPU HBM dynamically for its actual decoding lengths. This paged attention
|
||||
is also effective when multiple requests share the same key and value contents for a large value of beam search or
|
||||
@@ -137,6 +137,10 @@ Installing vLLM
|
||||
|
||||
Refer to :ref:`mi300x-vllm-optimization` for performance optimization tips.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _fine-tuning-llms-tgi:
|
||||
|
||||
Hugging Face TGI
|
||||
|
||||
@@ -189,6 +189,9 @@ Installing bitsandbytes
|
||||
# Use -DBNB_ROCM_ARCH to specify target GPU arch
|
||||
cmake -DBNB_ROCM_ARCH="gfx942" -DCOMPUTE_BACKEND=hip -S .
|
||||
|
||||
# Compile the project
|
||||
make
|
||||
|
||||
# Install
|
||||
python setup.py install
|
||||
|
||||
@@ -219,12 +222,11 @@ To get started with bitsandbytes primitives, use the following code as reference
|
||||
Using bitsandbytes with Hugging Face Transformers
|
||||
-------------------------------------------------
|
||||
|
||||
To load a Transformers model in 4-bit, set ``load_int_4bt=true`` in ``BitsAndBytesConfig``.
|
||||
To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndBytesConfig``.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM
|
||||
from bitsandbytes import BitsAndBytesConfig
|
||||
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
||||
@@ -240,8 +242,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from bitsandbytes import BitsAndBytesConfig
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
|
||||
|
||||
334
docs/how-to/performance-validation/mi300x/vllm-benchmark.rst
Normal file
334
docs/how-to/performance-validation/mi300x/vllm-benchmark.rst
Normal file
@@ -0,0 +1,334 @@
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified
|
||||
ROCm Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
***********************************************************
|
||||
LLM inference performance validation on AMD Instinct MI300X
|
||||
***********************************************************
|
||||
|
||||
.. _vllm-benchmark-unified-docker:
|
||||
|
||||
The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
|
||||
a prebuilt, optimized environment designed for validating large language model
|
||||
(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This
|
||||
ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the
|
||||
MI300X accelerator and includes the following components:
|
||||
|
||||
* `ROCm 6.2.0 <https://github.com/ROCm/ROCm>`_
|
||||
|
||||
* `vLLM 0.4.3 <https://docs.vllm.ai/en/latest>`_
|
||||
|
||||
* `PyTorch 2.4.0 <https://github.com/pytorch/pytorch>`_
|
||||
|
||||
* Tuning files (in CSV format)
|
||||
|
||||
With this Docker image, you can quickly validate the expected inference
|
||||
performance numbers on the MI300X accelerator. This topic also provides tips on
|
||||
optimizing performance with popular AI models.
|
||||
|
||||
.. _vllm-benchmark-vllm:
|
||||
|
||||
.. note::
|
||||
|
||||
vLLM is a toolkit and library for LLM inference and
|
||||
serving. It deploys the PagedAttention algorithm, which reduces memory
|
||||
consumption and increases throughput by leveraging dynamic key and value
|
||||
allocation in GPU memory. vLLM also incorporates many LLM acceleration
|
||||
and quantization algorithms. In addition, AMD implements high-performance
|
||||
custom kernels and modules in vLLM to enhance performance further. See
|
||||
:ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for more
|
||||
information.
|
||||
|
||||
Getting started
|
||||
===============
|
||||
|
||||
Use the following procedures to reproduce the benchmark results on an
|
||||
MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
.. _vllm-benchmark-get-started:
|
||||
|
||||
1. Disable NUMA auto-balancing.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# disable automatic NUMA balancing
|
||||
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
# check if NUMA balancing is disabled (returns 0 if disabled)
|
||||
cat /proc/sys/kernel/numa_balancing
|
||||
0
|
||||
|
||||
2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
|
||||
|
||||
Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
|
||||
Once setup is complete, you can choose between two options to reproduce the
|
||||
benchmark results:
|
||||
|
||||
- :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
|
||||
|
||||
- :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
|
||||
|
||||
.. _vllm-benchmark-mad:
|
||||
|
||||
MAD-integrated benchmarking
|
||||
===========================
|
||||
|
||||
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
Use this command to run a performance benchmark test of the Llama 3.1 8B model
|
||||
on one GPU with ``float16`` data type in the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
ROCm MAD launches a Docker container with the name
|
||||
``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
|
||||
model are collected in the following path: ``~/MAD/reports_float16/``
|
||||
|
||||
Although the following eight models are pre-configured to collect latency and
|
||||
throughput performance data, users can also change the benchmarking parameters.
|
||||
Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
|
||||
|
||||
Available models
|
||||
----------------
|
||||
|
||||
.. hlist::
|
||||
:columns: 3
|
||||
|
||||
* ``pyt_vllm_llama-3.1-8b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-70b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-405b``
|
||||
|
||||
* ``pyt_vllm_llama-2-7b``
|
||||
|
||||
* ``pyt_vllm_mistral-7b``
|
||||
|
||||
* ``pyt_vllm_qwen2-7b``
|
||||
|
||||
* ``pyt_vllm_jais-13b``
|
||||
|
||||
* ``pyt_vllm_jais-30b``
|
||||
|
||||
.. _vllm-benchmark-standalone:
|
||||
|
||||
Standalone benchmarking
|
||||
=======================
|
||||
|
||||
You can run the vLLM benchmark tool independently by starting the
|
||||
:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
|
||||
snippet.
|
||||
|
||||
.. code-block::
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name unified_docker_vllm rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50
|
||||
|
||||
In the Docker container, clone the ROCm MAD repository and navigate to the
|
||||
benchmark scripts directory at ``~/MAD/scripts/vllm``.
|
||||
|
||||
.. code-block::
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/vllm
|
||||
|
||||
Multiprocessing distributed executor
|
||||
--------------------------------------
|
||||
|
||||
To optimize vLLM performance, add the multiprocessing API server argument ``--distributed-executor-backend mp``.
|
||||
|
||||
Command
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To start the benchmark, use the following command with the appropriate options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
|
||||
|
||||
See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
|
||||
|
||||
.. note::
|
||||
|
||||
The input sequence length, output sequence length, and tensor parallel (TP) are
|
||||
already configured. You don't need to specify them with this script.
|
||||
|
||||
.. note::
|
||||
|
||||
If you encounter the following error, pass your access-authorized Hugging
|
||||
Face token to the gated models.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
OSError: You are trying to access a gated repo.
|
||||
|
||||
# pass your HF_TOKEN
|
||||
export HF_TOKEN=$your_personal_hf_token
|
||||
|
||||
.. _vllm-benchmark-standalone-options:
|
||||
|
||||
Options
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
* - ``$test_option``
|
||||
- latency
|
||||
- Measure decoding token latency
|
||||
|
||||
* -
|
||||
- throughput
|
||||
- Measure token generation throughput
|
||||
|
||||
* -
|
||||
- all
|
||||
- Measure both throughput and latency
|
||||
|
||||
* - ``$model_repo``
|
||||
- ``meta-llama/Meta-Llama-3.1-8B-Instruct``
|
||||
- Llama 3.1 8B
|
||||
|
||||
* - (``float16``)
|
||||
- ``meta-llama/Meta-Llama-3.1-70B-Instruct``
|
||||
- Llama 3.1 70B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Meta-Llama-3.1-405B-Instruct``
|
||||
- Llama 3.1 405B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Llama-2-7b-chat-hf``
|
||||
- Llama 2 7B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x7B-Instruct-v0.1``
|
||||
- Mixtral 8x7B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x22B-Instruct-v0.1``
|
||||
- Mixtral 8x22B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mistral-7B-Instruct-v0.3``
|
||||
- Mixtral 7B
|
||||
|
||||
* -
|
||||
- ``Qwen/Qwen2-7B-Instruct``
|
||||
- Qwen2 7B
|
||||
|
||||
* -
|
||||
- ``core42/jais-13b-chat``
|
||||
- JAIS 13B
|
||||
|
||||
* -
|
||||
- ``core42/jais-30b-chat-v3``
|
||||
- JAIS 30B
|
||||
|
||||
* - ``$num_gpu``
|
||||
- 1 or 8
|
||||
- Number of GPUs
|
||||
|
||||
* - ``$datatype``
|
||||
- ``float16``
|
||||
- Data type
|
||||
|
||||
.. _vllm-benchmark-run-benchmark:
|
||||
|
||||
Running the benchmark on the MI300X accelerator
|
||||
-----------------------------------------------
|
||||
|
||||
Here are some examples of running the benchmark with various options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
Latency benchmark example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` data type.
|
||||
|
||||
.. code-block::
|
||||
|
||||
./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
|
||||
Find the latency report at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv``
|
||||
|
||||
Throughput benchmark example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
|
||||
Find the throughput reports at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv``
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<style>
|
||||
mjx-container[jax="CHTML"][display="true"] {
|
||||
text-align: left;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
.. note::
|
||||
|
||||
Throughput is calculated as:
|
||||
|
||||
- .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
|
||||
|
||||
- .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
|
||||
|
||||
- For a list of other ready-made Docker images for ROCm, see the
|
||||
:doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
|
||||
|
||||
@@ -41,6 +41,13 @@ vLLM walkthrough
|
||||
Refer to this developer blog for guidance on serving with vLLM `Inferencing and serving with vLLM on AMD GPUs — ROCm
|
||||
Blogs <https://rocm.blogs.amd.com/artificial-intelligence/vllm/README.html>`_
|
||||
|
||||
Validating vLLM performance
|
||||
---------------------------
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _rocm-for-ai-serve-hugging-face-tgi:
|
||||
|
||||
Serving using Hugging Face TGI
|
||||
|
||||
@@ -22,25 +22,25 @@ If you’re new to ROCm, refer to the :doc:`ROCm quick start install guide for L
|
||||
<rocm-install-on-linux:install/quick-start>`.
|
||||
|
||||
If you’re using a Radeon GPU for graphics-accelerated applications, refer to the
|
||||
:doc:`Radeon installation instructions <radeon:docs/install/install-radeon>`.
|
||||
`Radeon installation instructions <https://rocm.docs.amd.com/projects/radeon/en/docs-6.1.3/docs/install/native_linux/install-radeon.html>`_.
|
||||
|
||||
ROCm supports two methods for installation. There is no difference in the final ROCm installation between these two
|
||||
methods. You can also opt for :ref:`single-version or multi-version installation
|
||||
<rocm-install-on-linux:installation-types>`.
|
||||
ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install/install-overview>`:
|
||||
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:how-to/native-install/index>`
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/native-install/index>`
|
||||
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:how-to/amdgpu-install>`
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`
|
||||
|
||||
* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`.
|
||||
|
||||
.. grid:: 1
|
||||
|
||||
.. grid-item-card:: Post-install
|
||||
|
||||
Follow the :doc:`post-installation instructions <rocm-install-on-linux:how-to/native-install/post-install>` to
|
||||
Follow the :doc:`post-installation instructions <rocm-install-on-linux:install/post-install>` to
|
||||
configure your system linker, PATH, and verify the installation.
|
||||
|
||||
If you encounter any issues during installation, refer to the
|
||||
:doc:`Installation troubleshooting <rocm-install-on-linux:how-to/native-install/install-faq>` guide.
|
||||
:doc:`Installation troubleshooting <rocm-install-on-linux:reference/install-faq>` guide.
|
||||
|
||||
Machine learning frameworks
|
||||
===========================
|
||||
|
||||
@@ -365,9 +365,9 @@ installed.
|
||||
## System management
|
||||
|
||||
For a complete guide on how to install/manage/uninstall ROCm on Linux, refer to
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:tutorial/quick-start>`. To verify that the installation was
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. To verify that the installation was
|
||||
successful, refer to the
|
||||
{doc}`post-install instructions<rocm-install-on-linux:how-to/native-install/post-install>` and
|
||||
{doc}`post-install instructions<rocm-install-on-linux:install/post-install>` and
|
||||
[system tools](../../reference/rocm-tools.md). Should verification
|
||||
fail, consult the [System Debugging Guide](../system-debugging.md).
|
||||
|
||||
|
||||
@@ -350,9 +350,9 @@ installed.
|
||||
## System management
|
||||
|
||||
For a complete guide on how to install/manage/uninstall ROCm on Linux, refer to
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:tutorial/quick-start>`. For verifying that the
|
||||
{doc}`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. For verifying that the
|
||||
installation was successful, refer to the
|
||||
{doc}`post-install instructions<rocm-install-on-linux:how-to/native-install/post-install>` and
|
||||
{doc}`post-install instructions<rocm-install-on-linux:install/post-install>` and
|
||||
[system tools](../../reference/rocm-tools.md). Should verification
|
||||
fail, consult the [System Debugging Guide](../system-debugging.md).
|
||||
|
||||
|
||||
@@ -122,6 +122,62 @@ This section describes performance-based settings.
|
||||
|
||||
transparent_hugepage=always
|
||||
|
||||
* **Increase the amount of allocatable memory**
|
||||
|
||||
By default, when using a device allocator via HIP, it is only possible to allocate 96 GiB out of
|
||||
a possible 128 GiB of memory on the MI300A. This limitation does not affect host allocations.
|
||||
To increase the available system memory, load the ``amdttm`` module with new values for
|
||||
``pages_limit`` and ``page_pool_size``. These numbers correspond to the number of 4 KiB pages of memory.
|
||||
To make 128 GiB of memory available across all four devices, for a total amount of 512 GiB,
|
||||
set ``pages_limit`` and ``page_pool_size`` to ``134217728``. For a two-socket system, divide these values
|
||||
by two. After setting these values, reload the AMDGPU driver.
|
||||
|
||||
First, review the current settings using this shell command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
cat /sys/module/amdttm/parameters/pages_limit
|
||||
|
||||
To set the amount of allocatable memory to all available memory on all four APU devices, run these commands:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
sudo modprobe amdttm pages_limit=134217728 page_pool_size=134217728
|
||||
sudo modprobe amdgpu
|
||||
|
||||
These settings can also be hardcoded in the ``/etc/modprobe.d/amdttm.conf`` file or specified as boot
|
||||
parameters.
|
||||
|
||||
To use the hardcoded method,
|
||||
the filesystem must already be set up when the kernel driver is loaded.
|
||||
To hardcode the settings, add the following lines to ``/etc/modprobe.d/amdttm.conf``:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
options amdttm pages_limit=134217728
|
||||
options amdttm page_pool_size=134217728
|
||||
|
||||
If the filesystem is not already set up when the kernel driver is loaded, then the options
|
||||
must be specified as boot parameters. To specify the settings
|
||||
as boot parameters when loading the kernel, use this example as a guideline:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
vmlinux-[...] amdttm.pages_limit=134217728 amdttm.page_pool_size=134217728 [...]
|
||||
|
||||
To verify the new settings and confirm the change, use this command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
cat /sys/module/amdttm/parameters/pages_limit
|
||||
|
||||
.. note::
|
||||
|
||||
The system settings for ``pages_limit`` and ``page_pool_size`` are calculated by multiplying the
|
||||
per-APU limit of 4 KiB pages, which is ``33554432``, by the number of APUs on the node. The limit for a system with
|
||||
two APUs ``33554432 x 2`` or ``67108864``.
|
||||
This means the ``modprobe`` command for two APUs is ``sudo modprobe amdttm pages_limit=67108864 page_pool_size=67108864``.
|
||||
|
||||
* **Limit the maximum and single memory allocations on the GPU**
|
||||
|
||||
Many AI-related applications were originally developed on discrete GPUs. Some of these applications
|
||||
@@ -195,10 +251,13 @@ This section describes performance-based settings.
|
||||
|
||||
* **Change affinity of ROCm helper threads**
|
||||
|
||||
This change prevents internal ROCm threads from having their CPU core affinity mask
|
||||
Changing the affinity prevents internal ROCm threads from having their CPU core affinity mask
|
||||
set to all CPU cores available. With this setting, the threads inherit their parent's
|
||||
CPU core affinity mask. If you have any questions regarding this setting,
|
||||
contact your MI300A platform vendor. To enable this setting, enter the following command:
|
||||
CPU core affinity mask. Before adjusting this setting, ensure you thoroughly understand
|
||||
your system topology and how the application, runtime environment, and batch system
|
||||
set the thread-to-core affinity. If you have any questions regarding this setting,
|
||||
contact your MI300A platform vendor or the AMD support team.
|
||||
To enable this setting, enter the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -213,9 +272,9 @@ System management
|
||||
========================================
|
||||
|
||||
For a complete guide on installing, managing, and uninstalling ROCm on Linux, see
|
||||
:doc:`Quick-start (Linux)<rocm-install-on-linux:tutorial/quick-start>`. To verify that the
|
||||
:doc:`Quick-start (Linux)<rocm-install-on-linux:install/quick-start>`. To verify that the
|
||||
installation was successful, see the
|
||||
:doc:`Post-installation instructions<rocm-install-on-linux:install/native-install/post-install>` and
|
||||
:doc:`Post-installation instructions<rocm-install-on-linux:install/post-install>` and
|
||||
:doc:`ROCm tools <../../reference/rocm-tools>` guides. If verification
|
||||
fails, consult the :doc:`System debugging guide <../system-debugging>`.
|
||||
|
||||
|
||||
@@ -534,7 +534,7 @@ optimizing user applications.
|
||||
For a complete guide on how to install, manage, or uninstall ROCm on Linux, refer to
|
||||
:doc:`rocm-install-on-linux:install/quick-start`. For verifying that the
|
||||
installation was successful, refer to the
|
||||
:doc:`rocm-install-on-linux:install/native-install/post-install`.
|
||||
:doc:`rocm-install-on-linux:install/post-install`.
|
||||
Should verification fail, consult :doc:`/how-to/system-debugging`.
|
||||
|
||||
Hardware verification with ROCm
|
||||
|
||||
@@ -8,6 +8,8 @@ accelerators. They include detailed instructions on system settings and
|
||||
application tuning suggestions to help you fully leverage the capabilities of
|
||||
these accelerators, thereby achieving optimal performance.
|
||||
|
||||
* :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/system`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/workload`
|
||||
|
||||
@@ -150,6 +150,10 @@ the workload to validate improvements and ensure that the changes have had the
|
||||
desired effect. Continuous iteration helps refine the performance gains and
|
||||
address any new bottlenecks that may emerge.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image that has everything required to implement
|
||||
the tips in this section. It includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
.. _mi300x-profiling-tools:
|
||||
|
||||
Profiling tools
|
||||
@@ -372,6 +376,11 @@ Refer to `vLLM documentation <https://docs.vllm.ai/en/latest/models/performance.
|
||||
for additional performance tips. :ref:`fine-tuning-llms-vllm` describes vLLM
|
||||
usage with ROCm.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance
|
||||
of LLM inference with vLLM on the MI300X accelerator. The Docker image includes
|
||||
ROCm, vLLM, PyTorch, and tuning files in the CSV format. For more information,
|
||||
see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
|
||||
Maximize throughput
|
||||
-------------------
|
||||
|
||||
|
||||
@@ -52,14 +52,7 @@ ROCm documentation is organized into the following categories:
|
||||
* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
|
||||
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
|
||||
* [System optimization](./how-to/system-optimization/index.rst)
|
||||
* [AMD Instinct MI300X](./how-to/system-optimization/mi300x.rst)
|
||||
* [AMD Instinct MI300A](./how-to/system-optimization/mi300a.rst)
|
||||
* [AMD Instinct MI200](./how-to/system-optimization/mi200.md)
|
||||
* [AMD Instinct MI100](./how-to/system-optimization/mi100.md)
|
||||
* [AMD Instinct RDNA2](./how-to/system-optimization/w6000-v620.md)
|
||||
* [AMD Instinct MI300X tuning guides](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [System tuning](./how-to/tuning-guides/mi300x/system.rst)
|
||||
* [Workload tuning](./how-to/tuning-guides/mi300x/workload.rst)
|
||||
* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [System debugging](./how-to/system-debugging.md)
|
||||
* [GPU-enabled MPI](./how-to/gpu-enabled-mpi.rst)
|
||||
* [Using advanced compiler features](./conceptual/compiler-topics.md)
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
* {doc}`MIVisionX <mivisionx:index>`
|
||||
* {doc}`rocAL <rocal:index>`
|
||||
* {doc}`rocDecode <rocdecode:index>`
|
||||
* {doc}`rocPyDecode <rocpydecode:index>`
|
||||
* {doc}`ROCm Performance Primitives (RPP) <rpp:index>`
|
||||
:::
|
||||
|
||||
@@ -53,18 +54,6 @@
|
||||
* {doc}`RCCL <rccl:index>`
|
||||
:::
|
||||
|
||||
(hip-runtime)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-hip.jpg
|
||||
:img-alt: HIP APIs
|
||||
:padding: 2
|
||||
|
||||
* {doc}`HIP runtime <hip:index>`
|
||||
* {doc}`HIPIFY <hipify:index>`
|
||||
:::
|
||||
|
||||
(math-apis)=
|
||||
|
||||
:::{grid-item-card}
|
||||
@@ -78,6 +67,7 @@
|
||||
* {doc}`hipBLASLt <hipblaslt:index>`
|
||||
* {doc}`hipFFT <hipfft:index>` / {doc}`rocFFT <rocfft:index>`
|
||||
* {doc}`hipfort <hipfort:index>`
|
||||
* {doc}`hipRAND <hiprand:index>` / {doc}`rocRAND <rocrand:index>`
|
||||
* {doc}`hipSOLVER <hipsolver:index>` / {doc}`rocSOLVER <rocsolver:index>`
|
||||
* {doc}`hipSPARSE <hipsparse:index>` / {doc}`rocSPARSE <rocsparse:index>`
|
||||
* {doc}`hipSPARSELt <hipsparselt:index>`
|
||||
@@ -86,16 +76,4 @@
|
||||
* [Tensile](https://github.com/ROCm/Tensile)
|
||||
:::
|
||||
|
||||
(random-number-apis)=
|
||||
|
||||
:::{grid-item-card}
|
||||
:class-card: sd-text-black
|
||||
:img-top: ../data/reference/banner-random-number.jpg
|
||||
:img-alt: Random number APIs
|
||||
:padding: 2
|
||||
|
||||
* {doc}`hipRAND <hiprand:index>`
|
||||
* {doc}`rocRAND <rocrand:index>`
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
@@ -70,9 +70,11 @@ subtrees:
|
||||
- file: how-to/system-optimization/w6000-v620.md
|
||||
title: AMD RDNA 2
|
||||
- file: how-to/tuning-guides/mi300x/index.rst
|
||||
title: AMD MI300X tuning guides
|
||||
title: AMD MI300X performance validation and tuning
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: how-to/performance-validation/mi300x/vllm-benchmark.rst
|
||||
title: Performance validation
|
||||
- file: how-to/tuning-guides/mi300x/system.rst
|
||||
title: System tuning
|
||||
- file: how-to/tuning-guides/mi300x/workload.rst
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
rocm-docs-core==1.7.0
|
||||
rocm-docs-core==1.8.0
|
||||
sphinx-reredirects
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
#
|
||||
accessible-pygments==0.0.5
|
||||
# via pydata-sphinx-theme
|
||||
alabaster==0.7.16
|
||||
alabaster==1.0.0
|
||||
# via sphinx
|
||||
babel==2.15.0
|
||||
babel==2.16.0
|
||||
# via
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
@@ -16,9 +16,9 @@ beautifulsoup4==4.12.3
|
||||
# via pydata-sphinx-theme
|
||||
breathe==4.35.0
|
||||
# via rocm-docs-core
|
||||
certifi==2024.7.4
|
||||
certifi==2024.8.30
|
||||
# via requests
|
||||
cffi==1.16.0
|
||||
cffi==1.17.1
|
||||
# via
|
||||
# cryptography
|
||||
# pynacl
|
||||
@@ -26,7 +26,7 @@ charset-normalizer==3.3.2
|
||||
# via requests
|
||||
click==8.1.7
|
||||
# via sphinx-external-toc
|
||||
cryptography==42.0.8
|
||||
cryptography==43.0.1
|
||||
# via pyjwt
|
||||
deprecated==1.2.14
|
||||
# via pygithub
|
||||
@@ -42,7 +42,7 @@ gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.43
|
||||
# via rocm-docs-core
|
||||
idna==3.7
|
||||
idna==3.10
|
||||
# via requests
|
||||
imagesize==1.4.1
|
||||
# via sphinx
|
||||
@@ -56,11 +56,11 @@ markdown-it-py==3.0.0
|
||||
# myst-parser
|
||||
markupsafe==2.1.5
|
||||
# via jinja2
|
||||
mdit-py-plugins==0.4.1
|
||||
mdit-py-plugins==0.4.2
|
||||
# via myst-parser
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
myst-parser==3.0.1
|
||||
myst-parser==4.0.0
|
||||
# via rocm-docs-core
|
||||
packaging==24.1
|
||||
# via
|
||||
@@ -72,18 +72,18 @@ pydata-sphinx-theme==0.15.4
|
||||
# via
|
||||
# rocm-docs-core
|
||||
# sphinx-book-theme
|
||||
pygithub==2.3.0
|
||||
pygithub==2.4.0
|
||||
# via rocm-docs-core
|
||||
pygments==2.18.0
|
||||
# via
|
||||
# accessible-pygments
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
pyjwt[crypto]==2.8.0
|
||||
pyjwt[crypto]==2.9.0
|
||||
# via pygithub
|
||||
pynacl==1.5.0
|
||||
# via pygithub
|
||||
pyyaml==6.0.1
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# myst-parser
|
||||
# rocm-docs-core
|
||||
@@ -92,15 +92,15 @@ requests==2.32.3
|
||||
# via
|
||||
# pygithub
|
||||
# sphinx
|
||||
rocm-docs-core==1.7.0
|
||||
rocm-docs-core==1.8.0
|
||||
# via -r requirements.in
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
snowballstemmer==2.2.0
|
||||
# via sphinx
|
||||
soupsieve==2.5
|
||||
soupsieve==2.6
|
||||
# via beautifulsoup4
|
||||
sphinx==7.3.7
|
||||
sphinx==8.0.2
|
||||
# via
|
||||
# breathe
|
||||
# myst-parser
|
||||
@@ -116,25 +116,25 @@ sphinx-book-theme==1.1.3
|
||||
# via rocm-docs-core
|
||||
sphinx-copybutton==0.5.2
|
||||
# via rocm-docs-core
|
||||
sphinx-design==0.6.0
|
||||
sphinx-design==0.6.1
|
||||
# via rocm-docs-core
|
||||
sphinx-external-toc==1.0.1
|
||||
# via rocm-docs-core
|
||||
sphinx-notfound-page==1.0.2
|
||||
sphinx-notfound-page==1.0.4
|
||||
# via rocm-docs-core
|
||||
sphinx-reredirects==0.1.5
|
||||
# via -r requirements.in
|
||||
sphinxcontrib-applehelp==1.0.8
|
||||
sphinxcontrib-applehelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==1.0.6
|
||||
sphinxcontrib-devhelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.0.5
|
||||
sphinxcontrib-htmlhelp==2.1.0
|
||||
# via sphinx
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-qthelp==1.0.7
|
||||
sphinxcontrib-qthelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-serializinghtml==1.1.10
|
||||
sphinxcontrib-serializinghtml==2.0.0
|
||||
# via sphinx
|
||||
tomli==2.0.1
|
||||
# via sphinx
|
||||
@@ -142,7 +142,7 @@ typing-extensions==4.12.2
|
||||
# via
|
||||
# pydata-sphinx-theme
|
||||
# pygithub
|
||||
urllib3==2.2.2
|
||||
urllib3==2.2.3
|
||||
# via
|
||||
# pygithub
|
||||
# requests
|
||||
|
||||
58
temp.md
58
temp.md
@@ -1,58 +0,0 @@
|
||||
## Components
|
||||
|
||||
The following table lists ROCm components and their individual versions for ROCm 6.2.0. Find an overview of officially
|
||||
supported versions of ROCm components, third-party libraries, and frameworks in the
|
||||
[Compatibility matrix](https://rocm.docs.amd.com/en/latest/release/docs/6.2.0/compatibility/compatibility-matrix).
|
||||
|
||||
| Category | Group | Name | Version | |
|
||||
|----------|-------|------|---------|:-:|
|
||||
| **Libraries** | **Machine learning and computer vision** | [Composable Kernel](https://rocm.docs.amd.com/projects/composable_kernel/en/docs/6.2.0) | 1.1.0 | [{fab}`github fa-lg`](https://github.com/ROCm/composable_kernel/releases/tag/rocm-6.2.0) |
|
||||
| | | [MIGraphX](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/docs/6.2.0) | 2.9 ⇒ [2.10](migraphx-2-10-0) | [{fab}`github fa-lg`](https://github.com/ROCm/AMDMIGraphX/releases/tag/rocm-6.2.0) |
|
||||
| | | [MIOpen](https://rocm.docs.amd.com/projects/MIOpen/en/docs/6.2.0) | 3.1.0 ⇒ [3.2.0](miopen-3-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/MIOpen/releases/tag/rocm-6.2.0) |
|
||||
| | | [MIVisionX](https://rocm.docs.amd.com/projects/MIVisionX/en/docs/6.2.0) | 2.5.0 ⇒ [3.0.0](mivisionx-3-0-0) | [{fab}`github fa-lg`](https://github.com/ROCm/MIVisionX/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocAL](https://rocm.docs.amd.com/projects/rocAL/en/docs/6.2.0) | 2.0.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocAL/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocDecode](https://rocm.docs.amd.com/projects/rocDecode/en/docs/6.2.0) | 0.6.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocDecode/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocPyDecode](https://rocm.docs.amd.com/projects/rocPyDecode/en/docs/6.2.0) | 0.1.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocPyDecode/releases/tag/rocm-6.2.0) |
|
||||
| | | [RPP](https://rocm.docs.amd.com/projects/rpp/en/docs/6.2.0) | 1.5.0 ⇒ [1.8.0](rpp-1-8-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rpp/releases/tag/rocm-6.2.0) |
|
||||
| | **Communication** | [rccl](https://rocm.docs.amd.com/projects/rccl/en/docs/6.2.0) | 2.18.6 ⇒ [2.20.5](rccl-2-20-5) | [{fab}`github fa-lg`](https://github.com/ROCm/rccl/releases/tag/rocm-6.2.0) |
|
||||
| | **Math** | [hipBLAS](https://rocm.docs.amd.com/projects/hipBLAS/en/docs/6.2.0) | 2.1.0 ⇒ [2.2.0](hipblas-2-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipBLAS/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipBLASLt](https://rocm.docs.amd.com/projects/hipBLASLt/en/docs/6.2.0) | 0.7.0 ⇒ [0.8.0](hipblaslt-0-8-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipBLASLt/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipFFT](https://rocm.docs.amd.com/projects/hipFFT/en/docs/6.2.0) | [1.0.14](hipfft-1-0-14) | [{fab}`github fa-lg`](https://github.com/ROCm/hipFFT/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipfort](https://rocm.docs.amd.com/projects/hipfort/en/docs/6.2.0) | 0.4-0 | [{fab}`github fa-lg`](https://github.com/ROCm/hipfort/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipRAND](https://rocm.docs.amd.com/projects/hipRAND/en/docs/6.2.0) | 2.10.17 ⇒ [2.11.0](hiprand-2-11-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipRAND/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipSOLVER](https://rocm.docs.amd.com/projects/hipSOLVER/en/docs/6.2.0) | 2.1.1 ⇒ [2.2.0](hipsolver-2-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipSOLVER/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipSPARSE](https://rocm.docs.amd.com/projects/hipSPARSE/en/docs/6.2.0) | 3.0.1 ⇒ [3.1.1](hipsparse-3-1-1) | [{fab}`github fa-lg`](https://github.com/ROCm/hipSPARSE/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipSPARSELt](https://rocm.docs.amd.com/projects/hipSPARSELt/en/docs/6.2.0) | 0.2.0 ⇒ [0.2.1](hipsparselt-0-2-1) | [{fab}`github fa-lg`](https://github.com/ROCm/hipSPARSELt/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocALUTION](https://rocm.docs.amd.com/projects/rocALUTION/en/docs/6.2.0) | 3.1.1 ⇒ [3.2.0](rocalution-3-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocALUTION/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocBLAS](https://rocm.docs.amd.com/projects/rocBLAS/en/docs/6.2.0) | 4.1.0 ⇒ [4.2.0](rocblas-4-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocBLAS/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocFFT](https://rocm.docs.amd.com/projects/rocFFT/en/docs/6.2.0) | 1.0.27 ⇒ [1.0.28](rocfft-1-0-28) | [{fab}`github fa-lg`](https://github.com/ROCm/rocFFT/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocRAND](https://rocm.docs.amd.com/projects/rocRAND/en/docs/6.2.0) | 3.0.0 ⇒ [3.1.0](rocrand-3-1-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocRAND/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocSOLVER](https://rocm.docs.amd.com/projects/rocSOLVER/en/docs/6.2.0) | 3.25.0 ⇒ [3.26.0](rocsolver-3-26-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocSOLVER/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocSPARSE](https://rocm.docs.amd.com/projects/rocSPARSE/en/docs/6.2.0) | 3.1.1 ⇒ [3.2.0](rocsparse-3-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocSPARSE/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocWMMA](https://rocm.docs.amd.com/projects/rocWMMA/en/docs/6.2.0) | 1.4.0 ⇒ [1.5.0](rocwmma-1-5-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocWMMA/releases/tag/rocm-6.2.0) |
|
||||
| | | [Tensile](https://rocm.docs.amd.com/projects/tensile/en/docs/6.2.0) | 4.40.0 ⇒ [4.41.0](tensile-4-41-0) | [{fab}`github fa-lg`](https://github.com/ROCm/tensile/releases/tag/rocm-6.2.0) |
|
||||
| | **Primitives** | [hipCUB](https://rocm.docs.amd.com/projects/hipCUB/en/docs/6.2.0) | 3.1.0 ⇒ [3.2.0](hipcub-3-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipCUB/releases/tag/rocm-6.2.0) |
|
||||
| | | [hipTensor](https://rocm.docs.amd.com/projects/hipTensor/en/docs/6.2.0) | 1.2.0 ⇒ [1.3.0](hiptensor-1-3-0) | [{fab}`github fa-lg`](https://github.com/ROCm/hipTensor/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocPRIM](https://rocm.docs.amd.com/projects/rocPRIM/en/docs/6.2.0) | 3.1.0 ⇒ [3.2.0](rocprim-3-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocPRIM/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocThrust](https://rocm.docs.amd.com/projects/rocThrust/en/docs/6.2.0) | 3.0.0 ⇒ [3.1.0](rocthrust-3-1-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocThrust/releases/tag/rocm-6.2.0) |
|
||||
| **Tools** | **Development** | [HIPIFY](https://rocm.docs.amd.com/projects/HIPIFY/docs/6.2.0) | 17.0.0 ⇒ [18.0.0](hipify-18-0-0) | [{fab}`github fa-lg`](https://github.com/ROCm/HIPIFY/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCdbgapi](https://rocm.docs.amd.com/projects/ROCdbgapi/en/docs/6.2.0) | 0.71.0 ⇒ [0.76.0](rocdbgapi-0-76-0) | [{fab}`github fa-lg`](https://github.com/ROCm/ROCdbgapi/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm CMake](https://rocm.docs.amd.com/projects/rocm-cmake/en/docs/6.2.0) | 0.12.0 ⇒ [0.13.0](rocm-cmake-0-13-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocm-cmake/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm Debugger (ROCgdb)](https://rocm.docs.amd.com/projects/rocm-cmake/en/docs/6.2.0) | 13 ⇒ [15](rocgdb-15) | [{fab}`github fa-lg`](https://github.com/ROCm/ROCgdb/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCr Debug Agent](https://rocm.docs.amd.com/projects/rocr_debug_agent/en/docs/6.2.0) | 2.0.3 | [{fab}`github fa-lg`](https://github.com/ROCm/rocr_debug_agent/releases/tag/rocm-6.2.0) |
|
||||
| | **Performance** | [Omniperf](https://rocm.docs.amd.com/projects/omniperf/en/docs/6.2.0) | 2.0.1 | [{fab}`github fa-lg`](https://github.com/ROCm/omniperf/releases/tag/rocm-6.2.0) |
|
||||
| | | [Omnitrace](https://rocm.docs.amd.com/projects/omnitrace/en/docs/6.2.0) | 1.11.2 | [{fab}`github fa-lg`](https://github.com/ROCm/omnitrace/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm Bandwidth Test](https://rocm.docs.amd.com/projects/rocm_bandwidth_test/en/docs/6.2.0) | 1.4.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocm_bandwidth_test/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCProfiler](https://rocm.docs.amd.com/projects/ROCProfiler/en/docs/6.2.0) | 2.0.0 ⇒ [2.0.0](rocprofiler-2-0-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rocm_bandwidth_test/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCProfiler-SDK](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/docs/6.2.0) | 0.4.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocm_bandwidth_test/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCTracer](https://rocm.docs.amd.com/projects/ROCTracer/en/docs/6.2.0) | 4.1.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rocm_bandwidth_test/releases/tag/rocm-6.2.0) |
|
||||
| | **System** | [AMD SMI](https://rocm.docs.amd.com/projects/amdsmi/en/docs/6.2.0) | 24.5.2 ⇒ [24.6.1](amd-smi-24-6-1) | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | | [rocminfo](https://rocm.docs.amd.com/projects/rdc/en/docs/6.2.0) | 1.0.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm Data Center Tool](https://rocm.docs.amd.com/projects/rdc/en/docs/6.2.0) | 0.3.0 ⇒ [1.0.0](rocm-data-center-tool-1-0-0) | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm SMI](https://rocm.docs.amd.com/projects/rdc/en/docs/6.2.0) | 7.2.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCm Validation Suite](https://rocm.docs.amd.com/projects/rdc/en/docs/6.2.0) | 1.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | | [TransferBench](https://rocm.docs.amd.com/projects/rdc/en/docs/6.2.0) | 1.5.0 | [{fab}`github fa-lg`](https://github.com/ROCm/rdc/releases/tag/rocm-6.2.0) |
|
||||
| | **Compilers** | [hipCC](https://rocm.docs.amd.com/projects/hipCC/en/docs/6.2.0) | 1.0.0 ⇒ [1.1.1](hipcc-1-1-1) | [{fab}`github fa-lg`](https://github.com/ROCm/llvm-project/releases/tag/rocm-6.2.0) |
|
||||
| | | [llvm-project](https://rocm.docs.amd.com/projects/llvm-project/en/docs/6.2.0) | 17.0.0 ⇒ [18.0.0](llvm-project-18-0-0) | [{fab}`github fa-lg`](https://github.com/ROCm/llvm-project/releases/tag/rocm-6.2.0) |
|
||||
| **Runtimes** | | [HIP](https://rocm.docs.amd.com/projects/HIP/en/docs/6.2.0) | 6.1 ⇒ [6.2](hip-6-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/HIP/releases/tag/rocm-6.2.0) |
|
||||
| | | [ROCr Runtime](https://rocm.docs.amd.com/projects/ROCr-Runtime/en/docs/6.2.0) | 6.1 ⇒ [6.2](hip-6-2-0) | [{fab}`github fa-lg`](https://github.com/ROCm/ROCR-Runtime/releases/tag/rocm-6.2.0) |
|
||||
Reference in New Issue
Block a user