mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-10 15:18:11 -05:00
Compare commits
33 Commits
docs/6.2.1
...
rocm-6.2.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f93799f9c9 | ||
|
|
a51979a2a5 | ||
|
|
912fab238c | ||
|
|
a34e7a381a | ||
|
|
e691307d60 | ||
|
|
681a98a59d | ||
|
|
15d5040517 | ||
|
|
41db131ada | ||
|
|
17ad66f3d4 | ||
|
|
5d1204ecdc | ||
|
|
c542ee1363 | ||
|
|
ed4ffafec5 | ||
|
|
3fe8e17e56 | ||
|
|
fa31ee35e3 | ||
|
|
4b737a1cda | ||
|
|
ff0d863a0d | ||
|
|
b203a7cb74 | ||
|
|
7d0e1aa06c | ||
|
|
920efcd0b1 | ||
|
|
ff26ae7c18 | ||
|
|
f9f92037cb | ||
|
|
4f9aadd6b6 | ||
|
|
3ab3782a7f | ||
|
|
516b6c8ebc | ||
|
|
508f3e1afb | ||
|
|
8d6d82d65f | ||
|
|
c5d0fb8477 | ||
|
|
3dc6d89297 | ||
|
|
7c0c197c91 | ||
|
|
055511ed33 | ||
|
|
210be113e4 | ||
|
|
7963d27853 | ||
|
|
a493148cbc |
@@ -8,20 +8,21 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- git
|
||||
- cmake
|
||||
- ninja-build
|
||||
- git
|
||||
- libdnnl-dev
|
||||
- libdrm-dev
|
||||
- libnuma-dev
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- libtbb-dev
|
||||
- nlohmann-json3-dev
|
||||
- libmsgpack-dev
|
||||
- libsqlite3-dev
|
||||
- libnuma-dev
|
||||
- libprotobuf-dev
|
||||
- libsqlite3-dev
|
||||
- libtbb-dev
|
||||
- ninja-build
|
||||
- nlohmann-json3-dev
|
||||
- protobuf-compiler
|
||||
- python3-pip
|
||||
- python3-pybind11
|
||||
- python3-venv
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -95,14 +96,12 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||
-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF
|
||||
-DBUILD_TESTING=ON
|
||||
-GNinja
|
||||
# REFERENCE: https://github.com/ROCm/composable_kernel/issues/782
|
||||
|
||||
@@ -8,15 +8,21 @@ parameters:
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- libelf-dev
|
||||
- g++
|
||||
- libdrm-dev
|
||||
- libelf-dev
|
||||
- libnuma-dev
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
- rocprofiler-register
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
|
||||
jobs:
|
||||
- job: ROCR_Runtime
|
||||
@@ -50,3 +56,120 @@ jobs:
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_SHARED_LIBS=ON
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
|
||||
- job: ROCR_Runtime_testing
|
||||
dependsOn: ROCR_Runtime
|
||||
condition: succeeded()
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: $(JOB_TEST_POOL)
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- task: Bash@3
|
||||
displayName: Install libhwloc5
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc5_1.11.12-3_amd64.deb
|
||||
wget http://ftp.us.debian.org/debian/pool/main/h/hwloc/libhwloc-dev_1.11.12-3_amd64.deb
|
||||
sudo apt install -y --allow-downgrades ./libhwloc5_1.11.12-3_amd64.deb ./libhwloc-dev_1.11.12-3_amd64.deb
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- task: Bash@3
|
||||
displayName: Build kfdtest
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
|
||||
script: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
|
||||
make
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: kfdtest
|
||||
testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
|
||||
testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
|
||||
- task: Bash@3
|
||||
displayName: Build rdmatest app
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/app
|
||||
script: |
|
||||
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm -DLIBHSAKMT_PATH=$(Agent.BuildDirectory)/rocm -DDRM_AMDGPU_INCLUDE_DIRS=$(Agent.BuildDirectory)/rocm/include .
|
||||
cmake --build .
|
||||
- task: Bash@3
|
||||
displayName: Build rdmatest driver
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/drv
|
||||
script: |
|
||||
sed -i 's/HSAKMT_PAGE_SHIFT/PAGE_SHIFT/g' amdp2ptest.c
|
||||
sed -i 's/"MIT"/"GPL"/' amdp2ptest.c
|
||||
RDMA_HEADER_DIR=/usr/src/amdgpu-*/include make all
|
||||
- task: Bash@3
|
||||
displayName: Install rdmatest driver
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/drv
|
||||
script: |
|
||||
sudo rmmod amdp2ptest.ko
|
||||
sudo insmod amdp2ptest.ko
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rdmatest
|
||||
testExecutable: yes | ./rdma_test
|
||||
testParameters: ''
|
||||
testDir: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/app
|
||||
testPublishResults: false
|
||||
- task: Bash@3
|
||||
displayName: Build rocrtst
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
|
||||
script: |
|
||||
sudo rm -rf $(Agent.BuildDirectory)/external/llvm-project
|
||||
mkdir -p $(Agent.BuildDirectory)/external/llvm-project/clang/lib
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/llvm/lib/clang/20/include $(Agent.BuildDirectory)/external/llvm-project/clang/lib/Headers
|
||||
mkdir build && cd build
|
||||
cmake .. \
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
|
||||
-DTARGET_DEVICES=$(JOB_GPU_TARGET) \
|
||||
-DROCM_DIR=$(Agent.BuildDirectory)/rocm \
|
||||
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
|
||||
-DOPENCL_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin
|
||||
make
|
||||
make rocrtst_kernels
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocrtst
|
||||
testExecutable: ./rocrtst64
|
||||
testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/$(JOB_GPU_TARGET)
|
||||
|
||||
@@ -43,6 +43,7 @@ parameters:
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: ROCmValidationSuite
|
||||
|
||||
@@ -90,6 +90,8 @@ jobs:
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_HIP_FLAGS="-Wno-missing-include-dirs"
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCK_BUILD_JIT_LIB=ON
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
-GNinja
|
||||
|
||||
@@ -89,7 +89,7 @@ jobs:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
- job: hip_tests_testing
|
||||
timeoutInMinutes: 180
|
||||
timeoutInMinutes: 240
|
||||
dependsOn: hip_tests
|
||||
condition: succeeded()
|
||||
variables:
|
||||
@@ -130,6 +130,7 @@ jobs:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo mkdir -p /opt/rocm/bin
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
|
||||
@@ -37,6 +37,7 @@ parameters:
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- rocSPARSE
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: hipSPARSE
|
||||
|
||||
141
.azuredevops/components/omnitrace.yml
Normal file
141
.azuredevops/components/omnitrace.yml
Normal file
@@ -0,0 +1,141 @@
|
||||
# largely referenced from: https://github.com/ROCm/omnitrace/blob/main/.github/workflows/ubuntu-jammy.yml
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- autoconf
|
||||
- autotools-dev
|
||||
- bison
|
||||
- build-essential
|
||||
- bzip2
|
||||
- clang
|
||||
- cmake
|
||||
- environment-modules
|
||||
- g++-12
|
||||
- libdrm-dev
|
||||
- libfabric-dev
|
||||
- libiberty-dev
|
||||
- libpapi-dev
|
||||
- libpfm4-dev
|
||||
- libtool
|
||||
- libopenmpi-dev
|
||||
- m4
|
||||
- openmpi-bin
|
||||
- software-properties-common
|
||||
- python3-pip
|
||||
- texinfo
|
||||
- zlib1g-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- numpy
|
||||
- perfetto
|
||||
- dataclasses
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocm-core
|
||||
- rocm_smi_lib
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler
|
||||
- rocprofiler-register
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: omnitrace
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- task: Bash@3
|
||||
displayName: ROCm symbolic link
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm compilers to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
# build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DOMNITRACE_BUILD_TESTING=ON
|
||||
-DOMNITRACE_BUILD_DYNINST=ON
|
||||
-DOMNITRACE_BUILD_LIBUNWIND=ON
|
||||
-DDYNINST_BUILD_TBB=ON
|
||||
-DDYNINST_BUILD_ELFUTILS=ON
|
||||
-DDYNINST_BUILD_LIBIBERTY=ON
|
||||
-DDYNINST_BUILD_BOOST=ON
|
||||
-DOMNITRACE_USE_PAPI=ON
|
||||
-DOMNITRACE_USE_MPI=ON
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
multithreadFlag: -- -j32
|
||||
- task: Bash@3
|
||||
displayName: Set up omnitrace env
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: source share/omnitrace/setup-env.sh
|
||||
workingDirectory: build
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: omnitrace
|
||||
- task: Bash@3
|
||||
displayName: Remove ROCm binaries from PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
- task: Bash@3
|
||||
displayName: Remove ROCm compilers from PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/llvm/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
@@ -43,6 +43,7 @@ parameters:
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- rocSPARSE
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: rocALUTION
|
||||
|
||||
@@ -52,6 +52,7 @@ parameters:
|
||||
- rocprofiler-register
|
||||
- rocm_smi_lib
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: rocBLAS
|
||||
|
||||
@@ -105,7 +105,9 @@ jobs:
|
||||
displayName: ROCm symbolic link
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
script: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
installEnabled: false
|
||||
|
||||
@@ -73,6 +73,13 @@ jobs:
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- task: Bash@3
|
||||
displayName: 'ROCm symbolic link'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
@@ -85,6 +92,24 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
publish: false
|
||||
- task: Bash@3
|
||||
displayName: Create wheel file
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python setup.py bdist_wheel
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
|
||||
parameters:
|
||||
sourceDir: $(Build.SourcesDirectory)/dist
|
||||
contentsString: '*.whl'
|
||||
targetDir: $(Build.ArtifactStagingDirectory)
|
||||
clean: false
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'wheel file Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: $(Build.ArtifactStagingDirectory)
|
||||
|
||||
- job: rocPyDecode_testing
|
||||
dependsOn: rocPyDecode
|
||||
|
||||
@@ -40,6 +40,7 @@ parameters:
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: rocSPARSE
|
||||
|
||||
95
.azuredevops/components/rocprofiler-sdk.yml
Normal file
95
.azuredevops/components/rocprofiler-sdk.yml
Normal file
@@ -0,0 +1,95 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- python3-pip
|
||||
- libdrm-dev
|
||||
- libdw-dev
|
||||
- libelf-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- black
|
||||
- clang-format
|
||||
- clang-tidy
|
||||
- cmake
|
||||
- cmake-format
|
||||
- dataclasses
|
||||
- numpy
|
||||
- otf2
|
||||
- pandas
|
||||
- perfetto
|
||||
- pycobertura
|
||||
- pytest
|
||||
- pyyaml
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
- roctracer
|
||||
|
||||
jobs:
|
||||
- job: rocprofilersdk
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
gfx90a:
|
||||
JOB_GPU_TARGET: gfx90a
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
# CI case: download latest default branch build
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
# manual build case: triggered by ROCm/ROCm repo
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCPROFILER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_BUILD_SAMPLES=OFF
|
||||
-DAMDGPU_TARGETS=$(JOB_GPU_TARGET)
|
||||
multithreadFlag: -- -j2
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
@@ -9,8 +9,18 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libopencv-dev
|
||||
- libsndfile1-dev
|
||||
- libstdc++-12-dev
|
||||
- imagemagick
|
||||
- ninja-build
|
||||
- clang
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- openpyxl
|
||||
- pandas
|
||||
- sphinx
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -20,6 +30,18 @@ parameters:
|
||||
- llvm-project
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- aomp
|
||||
- clr
|
||||
- half
|
||||
- hipTensor
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
|
||||
jobs:
|
||||
- job: rpp
|
||||
@@ -68,3 +90,92 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
|
||||
- job: rpp_testing
|
||||
dependsOn: rpp
|
||||
condition: succeeded()
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/lib;$(Agent.BuildDirectory)/rocm/llvm/lib
|
||||
pool: $(JOB_TEST_POOL)
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
${{ if eq(parameters.checkoutRef, '') }}:
|
||||
dependencySource: staging
|
||||
${{ elseif ne(parameters.checkoutRef, '') }}:
|
||||
dependencySource: tag-builds
|
||||
# Dependencies from: https://github.com/ROCm/rpp/blob/develop/utilities/test_suite/README.md
|
||||
- task: Bash@3
|
||||
displayName: Build and install Turbo JPEG
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo apt-get install nasm
|
||||
sudo apt-get install wget
|
||||
git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git
|
||||
cd libjpeg-turbo
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/usr \
|
||||
-DCMAKE_BUILD_TYPE=RELEASE \
|
||||
-DENABLE_STATIC=FALSE \
|
||||
-DCMAKE_INSTALL_DEFAULT_LIBDIR=lib \
|
||||
-DWITH_JPEG8=TRUE \
|
||||
..
|
||||
make -j$nproc
|
||||
sudo make install
|
||||
- task: Bash@3
|
||||
displayName: Build and install Nifti
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
git clone https://github.com/NIFTI-Imaging/nifti_clib.git
|
||||
cd nifti_clib
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
sudo make -j$nproc install
|
||||
- task: Bash@3
|
||||
displayName: Build rpp tests
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
|
||||
mkdir rpp-tests
|
||||
cd rpp-tests
|
||||
cmake /opt/rocm/share/rpp/test \
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ \
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rpp
|
||||
testExecutable: 'export PATH=$(Agent.BuildDirectory)/rocm/llvm/bin:$PATH; CC=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang CMAKE_VERBOSE_MAKEFILE=ON VERBOSE=1 ctest'
|
||||
testDir: 'rpp-tests'
|
||||
- script: sudo rm /opt/rocm
|
||||
|
||||
@@ -62,6 +62,8 @@ parameters:
|
||||
- ffmpeg
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- libswscale-dev
|
||||
- libavformat-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -87,6 +89,9 @@ parameters:
|
||||
# list for vision
|
||||
- auditwheel
|
||||
- future
|
||||
- pytest
|
||||
- pytest-azurepipelines
|
||||
- pillow
|
||||
# list from https://github.com/pytorch/builder/blob/main/manywheel/build_rocm.sh
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -117,6 +122,24 @@ parameters:
|
||||
- hipCUB
|
||||
- rocThrust
|
||||
- hipBLAS-common
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocminfo
|
||||
# Reference on what tests to run for torchvision found in private repo:
|
||||
# https://github.com/ROCm/rocAutomation/blob/jenkins-pipelines/pytorch/pytorch_ci/test_pytorch_test1.sh#L54
|
||||
# Will iterate through this list using pytest
|
||||
- name: torchTestList
|
||||
type: object
|
||||
default:
|
||||
- nn
|
||||
- torch
|
||||
# - cuda seg faults and might need cuda installed on test system
|
||||
# - ops takes too long
|
||||
- unary_ufuncs
|
||||
- binary_ufuncs
|
||||
- autograd
|
||||
# - inductor/torchinductor takes too long
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
@@ -295,12 +318,22 @@ jobs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/vision.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Apply vision patch
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
git apply $(Build.SourcesDirectory)/.azuredevops/patches/torchvision-package-name.patch
|
||||
workingDirectory: $(Build.SourcesDirectory)/vision
|
||||
- task: Bash@3
|
||||
displayName: Build vision
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
BUILD_VERSION=$(ROCM_BRANCH).$(JOB_GPU_TARGET)-$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)$(date -u +%Y%m%d)
|
||||
TORCH_PACKAGE_NAME=torch.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
TORCHVISION_PACKAGE_NAME=torchvision.$(ROCM_BRANCH).$(JOB_GPU_TARGET)
|
||||
PYTORCH_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
|
||||
BUILD_VERSION=$(cat $(Build.SourcesDirectory)/vision/version.txt | cut -da -f1)post$(date -u +%Y%m%d)
|
||||
python3 setup.py bdist_wheel
|
||||
workingDirectory: $(Build.SourcesDirectory)/vision
|
||||
- task: Bash@3
|
||||
@@ -319,3 +352,192 @@ jobs:
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetPath: $(Build.BinariesDirectory)
|
||||
|
||||
- job: torchvision_testing
|
||||
dependsOn: pytorch
|
||||
condition: succeeded()
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PYTORCH_TEST_WITH_ROCM
|
||||
value: 1
|
||||
pool: $(JOB_TEST_POOL)
|
||||
workspace:
|
||||
clean: all
|
||||
strategy:
|
||||
matrix:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: 'Register libjpeg-turbo packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget -q -O- https://packagecloud.io/dcommander/libjpeg-turbo/gpgkey | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/libjpeg-turbo.gpg > /dev/null
|
||||
echo "deb [signed-by=/etc/apt/trusted.gpg.d/libjpeg-turbo.gpg] https://packagecloud.io/dcommander/libjpeg-turbo/any/ any main" | sudo tee /etc/apt/sources.list.d/libjpeg-turbo.list
|
||||
sudo apt update
|
||||
apt-cache show libjpeg-turbo-official | grep Version
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
inputs:
|
||||
itemPattern: '**/*$(JOB_GPU_TARGET)*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
dependencySource: staging
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
dependencySource: staging
|
||||
skipLlvmSymlink: true
|
||||
# get sources to run test scripts
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream pytorch
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: git clone pytorch vision
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/pytorch/vision.git --depth=1 --recurse-submodules
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Install Wheel Files
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: find . -name "*.whl" -exec pip install --no-index --find-links=. --no-dependencies -v {} \;
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Show Updated pip List
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: pip list -v
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
- task: Bash@3
|
||||
displayName: Add Python site-packages binaries to path
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
- task: Bash@3
|
||||
displayName: Add torch libs to ldconfig
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo $(python3 -m site --user-site)/torch/lib | sudo tee /etc/ld.so.conf.d/torch.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
# https://pytorch.org/get-started/locally/#linux-verification
|
||||
# https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#testing-the-pytorch-installation
|
||||
- task: Bash@3
|
||||
displayName: Simple Import Torch Tests
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
python3 -c 'import torch' 2> /dev/null && echo 'Success' || echo 'Failure'
|
||||
python3 -c 'import torch; print(torch.cuda.is_available())'
|
||||
python3 -c 'import torch; x = torch.rand(5, 3); print(x)'
|
||||
# Test artifact build script has too many if statements for different environments
|
||||
# Based off the snippet of interest for this environment, with some adjustments
|
||||
# https://github.com/pytorch/pytorch/blob/main/.ci/pytorch/build.sh#L335-L375
|
||||
# Removing in-line comments since it does not fit with the yaml markup
|
||||
- task: Bash@3
|
||||
displayName: Build Pytorch Test Artifacts
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR="build/custom_test_artifacts"
|
||||
CUSTOM_TEST_USE_ROCM=ON
|
||||
CUSTOM_TEST_MODULE_PATH="${PWD}/cmake/public"
|
||||
mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
|
||||
|
||||
CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
|
||||
CUSTOM_OP_TEST="${PWD}/test/custom_operator"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
|
||||
|
||||
mkdir -p "$CUSTOM_OP_BUILD"
|
||||
pushd "$CUSTOM_OP_BUILD"
|
||||
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
|
||||
JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
|
||||
JIT_HOOK_TEST="$PWD/test/jit_hooks"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
|
||||
mkdir -p "$JIT_HOOK_BUILD"
|
||||
pushd "$JIT_HOOK_BUILD"
|
||||
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
|
||||
CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
|
||||
CUSTOM_BACKEND_TEST="${PWD}/test/custom_backend"
|
||||
python --version
|
||||
mkdir -p "$CUSTOM_BACKEND_BUILD"
|
||||
pushd "$CUSTOM_BACKEND_BUILD"
|
||||
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
|
||||
-DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
|
||||
make VERBOSE=1
|
||||
popd
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch
|
||||
- ${{ each torchTest in parameters.torchTestList }}:
|
||||
- task: Bash@3
|
||||
displayName: Test ${{ torchTest }}
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Build.SourcesDirectory)/pytorch
|
||||
${{ if contains(torchTest, '/') }}:
|
||||
script: pytest test/${{ split(torchTest, '/')[0] }}/test_${{ split(torchTest, '/')[1] }}.py
|
||||
${{ else }}:
|
||||
script: pytest test/test_${{ torchTest }}.py
|
||||
# Reference on what tests to run for torchvision found in private repo:
|
||||
# https://github.com/ROCm/rocAutomation/blob/jenkins-pipelines/pytorch/pytorch_ci/test_torchvision.sh#L51
|
||||
- task: Bash@3
|
||||
displayName: Test vision/transforms
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: pytest test/test_transforms.py
|
||||
workingDirectory: $(Build.SourcesDirectory)/vision
|
||||
- task: Bash@3
|
||||
displayName: Uninstall Wheel Files
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: find . -name "*.whl" -exec pip uninstall -y {} \;
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- task: Bash@3
|
||||
displayName: Remove Python site-packages binaries from path
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$USER_BASE/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
- task: Bash@3
|
||||
displayName: Remove ROCm binaries from PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')"
|
||||
|
||||
@@ -101,6 +101,7 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
dependencySource: staging
|
||||
skipLibraryLinking: true
|
||||
skipLlvmSymlink: true
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
|
||||
34
.azuredevops/patches/torchvision-package-name.patch
Normal file
34
.azuredevops/patches/torchvision-package-name.patch
Normal file
@@ -0,0 +1,34 @@
|
||||
From 036307033d5c187b3123dae46477feacbd06d0ab Mon Sep 17 00:00:00 2001
|
||||
From: Joseph Macaranas <Joseph.Macaranas@amd.com>
|
||||
Date: Sun, 22 Sep 2024 23:03:48 -0400
|
||||
Subject: [PATCH] Allow custom package name for CI builds of torchvision
|
||||
|
||||
---
|
||||
setup.py | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/setup.py b/setup.py
|
||||
index 4b0525d8e4..2c51ce04f2 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -42,7 +42,7 @@ CSRS_DIR = ROOT_DIR / "torchvision/csrc"
|
||||
IS_ROCM = (torch.version.hip is not None) and (ROCM_HOME is not None)
|
||||
BUILD_CUDA_SOURCES = (torch.cuda.is_available() and ((CUDA_HOME is not None) or IS_ROCM)) or FORCE_CUDA
|
||||
|
||||
-PACKAGE_NAME = "torchvision"
|
||||
+PACKAGE_NAME = os.getenv("TORCHVISION_PACKAGE_NAME", "torchvision")
|
||||
|
||||
print("Torchvision build configuration:")
|
||||
print(f"{FORCE_CUDA = }")
|
||||
@@ -98,7 +98,7 @@ def get_requirements():
|
||||
except DistributionNotFound:
|
||||
return None
|
||||
|
||||
- pytorch_dep = "torch"
|
||||
+ pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch")
|
||||
if os.getenv("PYTORCH_VERSION"):
|
||||
pytorch_dep += "==" + os.getenv("PYTORCH_VERSION")
|
||||
|
||||
--
|
||||
2.44.0.windows.1
|
||||
|
||||
29
.azuredevops/tag-builds/omnitrace.yml
Normal file
29
.azuredevops/tag-builds/omnitrace.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: refs/tags/$(LATEST_RELEASE_TAG)
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
- repository: release_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/omnitrace
|
||||
ref: ${{ parameters.checkoutRef }}
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/omnitrace.yml
|
||||
parameters:
|
||||
checkoutRepo: release_repo
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
29
.azuredevops/tag-builds/rocprofiler-sdk.yml
Normal file
29
.azuredevops/tag-builds/rocprofiler-sdk.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: refs/tags/$(LATEST_RELEASE_TAG)
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
- repository: release_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/rocprofiler-sdk
|
||||
ref: ${{ parameters.checkoutRef }}
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-sdk.yml
|
||||
parameters:
|
||||
checkoutRepo: release_repo
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -29,6 +29,7 @@ parameters:
|
||||
composable_kernel: develop
|
||||
half: rocm
|
||||
HIP: amd-staging
|
||||
hip-tests: amd-staging
|
||||
hipBLAS: develop
|
||||
hipBLASLt: develop
|
||||
hipBLAS-common: develop
|
||||
@@ -44,6 +45,7 @@ parameters:
|
||||
llvm-project: amd-staging
|
||||
MIOpen: develop
|
||||
MIVisionX: develop
|
||||
omniperf: amd-staging
|
||||
rccl: develop
|
||||
rdc: develop
|
||||
rocAL: develop
|
||||
@@ -64,6 +66,7 @@ parameters:
|
||||
rocPRIM: develop
|
||||
rocprofiler-register: amd-mainline
|
||||
rocprofiler: amd-staging
|
||||
rocPyDecode: develop
|
||||
ROCR-Runtime: amd-staging
|
||||
rocRAND: develop
|
||||
rocr_debug_agent: amd-staging
|
||||
@@ -79,12 +82,14 @@ parameters:
|
||||
- amdsmi
|
||||
- aomp
|
||||
- HIPIFY
|
||||
- MIVisionX
|
||||
- omniperf
|
||||
- rccl
|
||||
- rdc
|
||||
- rocm-cmake
|
||||
- rocm_smi_lib
|
||||
- rocFFT
|
||||
- MIVisionX
|
||||
- rpp
|
||||
# BELOW REQUIRED IF useDefaultBranch false
|
||||
- name: branchName
|
||||
type: string
|
||||
|
||||
@@ -37,6 +37,7 @@ parameters:
|
||||
composable_kernel: $(COMPOSABLE_KERNEL_PIPELINE_ID)
|
||||
half: $(HALF_PIPELINE_ID)
|
||||
HIP: $(HIP_PIPELINE_ID)
|
||||
hip-tests: $(HIP_TESTS_PIPELINE_ID)
|
||||
hipBLAS: $(HIPBLAS_PIPELINE_ID)
|
||||
hipBLAS-common: $(HIPBLAS_COMMON_PIPELINE_ID)
|
||||
hipBLASLt: $(HIPBLASLT_PIPELINE_ID)
|
||||
@@ -73,6 +74,7 @@ parameters:
|
||||
rocPRIM: $(ROCPRIM_PIPELINE_ID)
|
||||
rocprofiler-register: $(ROCPROFILER_REGISTER_PIPELINE_ID)
|
||||
rocprofiler: $(ROCPROFILER_PIPELINE_ID)
|
||||
rocPyDecode: $(ROCPYDECODE_PIPELINE_ID)
|
||||
ROCR-Runtime: $(ROCR_RUNTIME_PIPELINE_ID)
|
||||
rocRAND: $(ROCRAND_PIPELINE_ID)
|
||||
rocr_debug_agent: $(ROCR_DEBUG_AGENT_PIPELINE_ID)
|
||||
@@ -94,6 +96,7 @@ parameters:
|
||||
composable_kernel: $(COMPOSABLE_KERNEL_TAGGED_PIPELINE_ID)
|
||||
half: $(HALF_TAGGED_PIPELINE_ID)
|
||||
HIP: $(HIP_TAGGED_PIPELINE_ID)
|
||||
hip-tests: $(HIP_TESTS_TAGGED_PIPELINE_ID)
|
||||
hipBLAS: $(HIPBLAS_TAGGED_PIPELINE_ID)
|
||||
hipBLAS-common: $(HIPBLAS_COMMON_TAGGED_PIPELINE_ID)
|
||||
hipBLASLt: $(HIPBLASLT_TAGGED_PIPELINE_ID)
|
||||
@@ -130,6 +133,7 @@ parameters:
|
||||
rocPRIM: $(ROCPRIM_TAGGED_PIPELINE_ID)
|
||||
rocprofiler-register: $(ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID)
|
||||
rocprofiler: $(ROCPROFILER_TAGGED_PIPELINE_ID)
|
||||
rocPyDecode: $(ROCPYDECODE_TAGGED_PIPELINE_ID)
|
||||
ROCR-Runtime: $(ROCR_RUNTIME_TAGGED_PIPELINE_ID)
|
||||
rocRAND: $(ROCRAND_TAGGED_PIPELINE_ID)
|
||||
rocr_debug_agent: $(ROCR_DEBUG_AGENT_TAGGED_PIPELINE_ID)
|
||||
@@ -175,6 +179,7 @@ parameters:
|
||||
- rocm-examples
|
||||
- rocPRIM
|
||||
- rocprofiler
|
||||
- rocPyDecode
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
|
||||
@@ -79,6 +79,10 @@ variables:
|
||||
value: 93
|
||||
- name: HIP_TAGGED_PIPELINE_ID
|
||||
value: 31
|
||||
- name: HIP_TESTS_PIPELINE_ID
|
||||
value: 233
|
||||
- name: HIP_TESTS_TAGGED_PIPELINE_ID
|
||||
value: 220
|
||||
- name: HIPBLAS_COMMON_PIPELINE_ID
|
||||
value: 223
|
||||
- name: HIPBLAS_COMMON_TAGGED_PIPELINE_ID
|
||||
@@ -223,6 +227,10 @@ variables:
|
||||
value: 22
|
||||
- name: ROCM_EXAMPLES_GFX942_TEST_PIPELINE_ID
|
||||
value: 204
|
||||
- name: ROCM_EXAMPLES_PIPELINE_ID
|
||||
value: 216
|
||||
- name: ROCM_EXAMPLES_TAGGED_PIPELINE_ID
|
||||
value: 245
|
||||
- name: ROCM_SMI_LIB_PIPELINE_ID
|
||||
value: 96
|
||||
- name: ROCM_SMI_LIB_TAGGED_PIPELINE_ID
|
||||
@@ -247,14 +255,18 @@ variables:
|
||||
value: 20
|
||||
- name: ROCPROFILER_GFX942_TEST_PIPELINE_ID
|
||||
value: 190
|
||||
- name: ROCPROFILER_PIPELINE_ID
|
||||
value: 143
|
||||
- name: ROCPROFILER_REGISTER_PIPELINE_ID
|
||||
value: 1
|
||||
- name: ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID
|
||||
value: 25
|
||||
- name: ROCPROFILER_PIPELINE_ID
|
||||
value: 143
|
||||
- name: ROCPROFILER_TAGGED_PIPELINE_ID
|
||||
value: 28
|
||||
- name: ROCPYDECODE_PIPELINE_ID
|
||||
value: 239
|
||||
- name: ROCPYDECODE_TAGGED_PIPELINE_ID
|
||||
value: 232
|
||||
- name: ROCR_DEBUG_AGENT_PIPELINE_ID
|
||||
value: 136
|
||||
- name: ROCR_DEBUG_AGENT_TAGGED_PIPELINE_ID
|
||||
|
||||
@@ -36,7 +36,6 @@ Bluefield
|
||||
Bootloader
|
||||
CCD
|
||||
CDNA
|
||||
CHTML
|
||||
CIFAR
|
||||
CLI
|
||||
CLion
|
||||
@@ -71,7 +70,6 @@ Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CuPy
|
||||
Dashboarding
|
||||
DDR
|
||||
DF
|
||||
DGEMM
|
||||
@@ -229,7 +227,6 @@ Mellanox's
|
||||
Meta's
|
||||
Miniconda
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
Multicore
|
||||
Multithreaded
|
||||
MyEnvironment
|
||||
@@ -275,8 +272,6 @@ OpenMPI
|
||||
OpenSSL
|
||||
OpenVX
|
||||
OpenXLA
|
||||
Oversubscription
|
||||
PagedAttention
|
||||
PCC
|
||||
PCI
|
||||
PCIe
|
||||
@@ -298,7 +293,6 @@ PowerShell
|
||||
PyPi
|
||||
PyTorch
|
||||
Qcycles
|
||||
Qwen
|
||||
RAII
|
||||
RAS
|
||||
RCCL
|
||||
@@ -567,7 +561,6 @@ hipfort
|
||||
hipify
|
||||
hipsolver
|
||||
hipsparse
|
||||
hlist
|
||||
hotspotting
|
||||
hpc
|
||||
hpp
|
||||
@@ -591,7 +584,6 @@ intra
|
||||
invariants
|
||||
invocating
|
||||
ipo
|
||||
jax
|
||||
kdb
|
||||
kfd
|
||||
latencies
|
||||
@@ -612,7 +604,6 @@ migraphx
|
||||
miopen
|
||||
miopengemm
|
||||
mivisionx
|
||||
mjx
|
||||
mkdir
|
||||
mlirmiopen
|
||||
mtypes
|
||||
|
||||
@@ -76,7 +76,7 @@ The Build time will reduce significantly if we limit the GPU Architecture/s agai
|
||||
|
||||
mkdir -p ~/WORKSPACE/ # Or any folder name other than WORKSPACE
|
||||
cd ~/WORKSPACE/
|
||||
export ROCM_VERSION=6.2.1
|
||||
export ROCM_VERSION=6.2.2 # Or 6.2.0 or 6.2.1
|
||||
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.2.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
|
||||
~/bin/repo sync
|
||||
|
||||
|
||||
166
RELEASE.md
166
RELEASE.md
@@ -1,6 +1,43 @@
|
||||
# ROCm 6.2.1 release notes
|
||||
# ROCm 6.2.2 release notes
|
||||
|
||||
The release notes provide a summary of notable changes since the previous ROCm release.
|
||||
These release notes provide a summary of notable changes since the previous ROCm release.
|
||||
|
||||
```{note}
|
||||
As ROCm 6.2.2 was released shortly after 6.2.1, the changes between these versions
|
||||
are minimal. For a comprehensive overview of recent updates, the ROCm 6.2.1 release
|
||||
notes are appended to the end of this document.
|
||||
|
||||
For detailed information about the changes in ROCm 6.2.1, refer to the appended
|
||||
section: [ROCm 6.2.1 release notes](rocm-6-2-1-release-notes).
|
||||
```
|
||||
|
||||
The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.2/compatibility/compatibility-matrix.html)
|
||||
provides the full list of supported hardware, operating systems, ecosystems, third-party components, and ROCm components
|
||||
for each ROCm release.
|
||||
|
||||
Release notes for previous ROCm releases are available in earlier versions of the documentation.
|
||||
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html).
|
||||
|
||||
## Release highlights
|
||||
|
||||
The following is a significant fix introduced in ROCm 6.2.2.
|
||||
|
||||
### Fixed Instinct MI300X error recovery failure
|
||||
|
||||
Improved the reliability of AMD Instinct MI300X accelerators in scenarios involving
|
||||
uncorrectable errors. Previously, error recovery did not occur as expected,
|
||||
potentially leaving the system in an undefined state. This fix ensures that error
|
||||
recovery functions as expected, maintaining system stability.
|
||||
|
||||
See the [original issue](#instinct-mi300x-gpu-recovery-failure-on-uncorrectable-errors)
|
||||
noted in the ROCm 6.2.1 release notes.
|
||||
|
||||
---
|
||||
|
||||
## ROCm 6.2.1 release notes
|
||||
|
||||
The ROCm 6.2.1 release notes document newly added ecosystem support, ROCm Offline Installer Creator updates,
|
||||
and improvements to several ROCm libraries and tools.
|
||||
|
||||
- [Release highlights](release-highlights)
|
||||
|
||||
@@ -14,31 +51,25 @@ The release notes provide a summary of notable changes since the previous ROCm r
|
||||
|
||||
- [ROCm upcoming changes](rocm-upcoming-changes)
|
||||
|
||||
The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.1/compatibility/compatibility-matrix.html)
|
||||
provides the full list of supported hardware, operating systems, ecosystems, third-party components, and ROCm components for each ROCm release.
|
||||
|
||||
Release notes for previous ROCm releases are available in earlier versions of the documentation.
|
||||
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html).
|
||||
|
||||
## Release highlights
|
||||
### Release highlights
|
||||
|
||||
The following are notable new features and improvements in ROCm 6.2.1. For changes to individual components, see [Detailed component changes](#detailed-component-changes).
|
||||
|
||||
### rocAL major version change
|
||||
#### rocAL major version change
|
||||
|
||||
The new version of rocAL introduces many new features, but does not modify any of the existing public API functions. However, the version number was incremented from 1.3 to 2.0.
|
||||
Applications linked to version 1.3 must be recompiled to link against version 2.0.
|
||||
|
||||
See [the rocAL detailed changes](#rocal-2-0-0) for more information.
|
||||
|
||||
### New support for FBGEMM (Facebook General Matrix Multiplication)
|
||||
#### New support for FBGEMM (Facebook General Matrix Multiplication)
|
||||
|
||||
As of ROCm 6.2.1, ROCm supports Facebook General Matrix Multiplication (FBGEMM) and the related FBGEMM_GPU library.
|
||||
|
||||
FBGEMM is a low-precision, high-performance CPU kernel library for convolution and matrix multiplication. It is used for server-side inference and as a back end for PyTorch quantized operators. FBGEMM_GPU includes a collection of PyTorch GPU operator libraries for training and inference. For more information, see the ROCm [Model acceleration libraries guide](https://rocm.docs.amd.com/en/docs-6.2.1/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.html)
|
||||
and [PyTorch's FBGEMM GitHub repository](https://github.com/pytorch/FBGEMM).
|
||||
|
||||
### ROCm Offline Installer Creator changes
|
||||
#### ROCm Offline Installer Creator changes
|
||||
|
||||
The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.1/install/rocm-offline-installer.html) introduces several new features and improvements including:
|
||||
|
||||
@@ -47,27 +78,25 @@ The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/in
|
||||
* Updated prerequisite repositories
|
||||
* Fixed CTest issues
|
||||
|
||||
### ROCm documentation changes
|
||||
#### ROCm documentation changes
|
||||
|
||||
The HIP documentation has been updated with several new topics aimed at improving usability
|
||||
and providing more detailed information.
|
||||
There have been no changes to supported hardware or operating systems from ROCm 6.2.0 to ROCm 6.2.1.
|
||||
|
||||
* The Programming Model Reference and Understanding the Programming Model topics in HIP have been consolidated into one topic,
|
||||
[HIP programming model (conceptual)](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/understand/programming_model.html).
|
||||
|
||||
* The [HIP virtual memory management](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/how-to/virtual_memory.html) and [HIP virtual memory management API](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/reference/virtual_memory_reference.html) topics have been added.
|
||||
|
||||
```{note}
|
||||
To contribute to ROCm documentation, see the [ROCm documentation contribution guidelines](https://rocm.docs.amd.com/en/latest/contribute/contributing.html).
|
||||
The ROCm documentation, like all ROCm projects, is open source and available on GitHub. To contribute to ROCm documentation, see the [ROCm documentation contribution guidelines](https://rocm.docs.amd.com/en/latest/contribute/contributing.html).
|
||||
```
|
||||
|
||||
## Operating system and hardware support changes
|
||||
### Operating system and hardware support changes
|
||||
|
||||
ROCm 6.2.1 adds support for Ubuntu 24.04.1 (kernel: 6.8 [GA]).
|
||||
|
||||
See the [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.1/compatibility/compatibility-matrix.html) for the full list of supported operating systems and hardware architectures.
|
||||
|
||||
## ROCm components
|
||||
### ROCm components
|
||||
|
||||
The following table lists the versions of ROCm components for ROCm 6.2.1, including any version changes from 6.2.0 to 6.2.1.
|
||||
|
||||
@@ -295,7 +324,7 @@ Click the component's updated version to go to a detailed list of its changes. C
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/rdc/en/docs-6.2.1">ROCm Data Center Tool</a></td>
|
||||
<td>0.3.0</td>
|
||||
<td>1.0.0</td>
|
||||
<td><a href="https://github.com/ROCm/rdc/releases/tag/rocm-6.2.1"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
@@ -317,7 +346,7 @@ Click the component's updated version to go to a detailed list of its changes. C
|
||||
<th rowspan="6"></th>
|
||||
<th rowspan="6">Performance</th>
|
||||
<td><a href="https://rocm.docs.amd.com/projects/omniperf/en/docs-6.2.1">Omniperf</a></td>
|
||||
<td>2.0.1 ⇒ <a href="#omniperf-2-0-1">2.0.1</a></td>
|
||||
<td>2.0.1</td>
|
||||
<td><a href="https://github.com/ROCm/omniperf/releases/tag/rocm-6.2.1"><i
|
||||
class="fab fa-github fa-lg"></i></a></td>
|
||||
</tr>
|
||||
@@ -422,76 +451,55 @@ Click the component's updated version to go to a detailed list of its changes. C
|
||||
</table>
|
||||
</div>
|
||||
|
||||
## Detailed component changes
|
||||
### Detailed component changes
|
||||
|
||||
The following sections describe key changes to ROCm components.
|
||||
|
||||
### **AMD SMI** (24.6.3)
|
||||
#### **AMD SMI** (24.6.3)
|
||||
|
||||
#### Changes
|
||||
##### Changes
|
||||
|
||||
* Added `amd-smi static --ras` on Guest VMs. Guest VMs can view enabled/disabled RAS features on Host cards.
|
||||
|
||||
#### Removals
|
||||
##### Removals
|
||||
|
||||
* Removed `amd-smi metric --ecc` & `amd-smi metric --ecc-blocks` on Guest VMs. Guest VMs do not support getting current ECC counts from the Host cards.
|
||||
|
||||
#### Resolved issues
|
||||
##### Resolved issues
|
||||
|
||||
* Fixed TypeError in `amd-smi process -G`.
|
||||
* Updated CLI error strings to handle empty and invalid GPU/CPU inputs.
|
||||
* Fixed Guest VM showing passthrough options.
|
||||
* Fixed firmware formatting where leading 0s were missing.
|
||||
|
||||
### **HIP** (6.2.1)
|
||||
#### **HIP** (6.2.1)
|
||||
|
||||
#### Resolved issues
|
||||
##### Resolved issues
|
||||
|
||||
* Soft hang when using `AMD_SERIALIZE_KERNEL`
|
||||
* Memory leak in `hipIpcCloseMemHandle`
|
||||
|
||||
### **HIPIFY** (18.0.0)
|
||||
#### **HIPIFY** (18.0.0)
|
||||
|
||||
#### Changes
|
||||
##### Changes
|
||||
|
||||
* Added CUDA 12.5.1 support.
|
||||
* Added cuDNN 9.2.1 support.
|
||||
* Added LLVM 18.1.8 support.
|
||||
* Added `hipBLAS` 64-bit APIs support.
|
||||
* Added Support for math constants `math_constants.h`.
|
||||
* Added CUDA 12.5.1 support
|
||||
* Added cuDNN 9.2.1 support
|
||||
* Added LLVM 18.1.8 support
|
||||
* Added `hipBLAS` 64-bit APIs support
|
||||
* Added Support for math constants `math_constants.h`
|
||||
|
||||
### **Omniperf** (2.0.1)
|
||||
#### **Omnitrace** (1.11.2)
|
||||
|
||||
#### Changes
|
||||
|
||||
* Enabled rocprofv1 for MI300 hardware.
|
||||
* Added dependency checks on application launch.
|
||||
* Updated Omniperf packaging.
|
||||
* Rolled back Grafana version in Dockerfile for Angular plugin compatibility.
|
||||
* Added GPU model distinction for MI300 systems.
|
||||
* Refactored and updated documemtation.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue with analysis output.
|
||||
* Fixed issues with profiling multi-process and multi-GPU applications.
|
||||
|
||||
#### Optimizations
|
||||
|
||||
* Reduced running time of Omniperf when profiling.
|
||||
* Improved console logging.
|
||||
|
||||
### **Omnitrace** (1.11.2)
|
||||
|
||||
#### Known issues
|
||||
##### Known issues
|
||||
|
||||
Perfetto can no longer open Omnitrace proto files. Loading Perfetto trace output `.proto` files in the latest version of `ui.perfetto.dev` can result in a dialog with the message, "Oops, something went wrong! Please file a bug." The information in the dialog will refer to an "Unknown field type." The workaround is to open the files with the previous version of the Perfetto UI found at [https://ui.perfetto.dev/v46.0-35b3d9845/#!/](https://ui.perfetto.dev/v46.0-35b3d9845/#!/).
|
||||
|
||||
See [issue #3767](https://github.com/ROCm/ROCm/issues/3767) on GitHub.
|
||||
|
||||
### **RCCL** (2.20.5)
|
||||
#### **RCCL** (2.20.5)
|
||||
|
||||
#### Known issues
|
||||
##### Known issues
|
||||
|
||||
On systems running Linux kernel 6.8.0, such as Ubuntu 24.04, Direct Memory Access (DMA) transfers between the GPU and NIC are disabled and impacts multi-node RCCL performance.
|
||||
This issue was reproduced with RCCL 2.20.5 (ROCm 6.2.0 and 6.2.1) on systems with Broadcom Thor-2 NICs and affects other systems with RoCE networks using Linux 6.8.0 or newer.
|
||||
@@ -501,9 +509,9 @@ This issue will be addressed in a future ROCm release.
|
||||
|
||||
See [issue #3772](https://github.com/ROCm/ROCm/issues/3772) on GitHub.
|
||||
|
||||
### **rocAL** (2.0.0)
|
||||
#### **rocAL** (2.0.0)
|
||||
|
||||
#### Changes
|
||||
##### Changes
|
||||
|
||||
* The new version of rocAL introduces many new features, but does not modify any of the existing public API functions.However, the version number was incremented from 1.3 to 2.0.
|
||||
Applications linked to version 1.3 must be recompiled to link against version 2.0.
|
||||
@@ -524,37 +532,37 @@ See [issue #3772](https://github.com/ROCm/ROCm/issues/3772) on GitHub.
|
||||
* Image to tensor updates
|
||||
* ROCm install - use case graphics removed
|
||||
|
||||
#### Known issues
|
||||
##### Known issues
|
||||
|
||||
* Dependencies are not installed with the rocAL package installer. Dependencies must be installed with the prerequisite setup script provided. See the [rocAL README on GitHub](https://github.com/ROCm/rocAL/blob/docs/6.2.1/README.md#prerequisites-setup-script) for details.
|
||||
|
||||
### **rocBLAS** (4.2.1)
|
||||
#### **rocBLAS** (4.2.1)
|
||||
|
||||
#### Removals
|
||||
##### Removals
|
||||
|
||||
* Removed Device_Memory_Allocation.pdf link in documentation.
|
||||
|
||||
#### Resolved issues
|
||||
##### Resolved issues
|
||||
|
||||
* Fixed error/warning message during `rocblas_set_stream()` call.
|
||||
|
||||
### **rocFFT** (1.0.29)
|
||||
#### **rocFFT** (1.0.29)
|
||||
|
||||
#### Optimizations
|
||||
##### Optimizations
|
||||
|
||||
* Implemented 1D kernels for factorizable sizes less than 1024.
|
||||
|
||||
### **ROCm SMI** (7.3.0)
|
||||
#### **ROCm SMI** (7.3.0)
|
||||
|
||||
#### Optimizations
|
||||
##### Optimizations
|
||||
|
||||
* Improved handling of UnicodeEncodeErrors with non UTF-8 locales. Non UTF-8 locales were causing crashes on UTF-8 special characters.
|
||||
|
||||
#### Resolved issues
|
||||
##### Resolved issues
|
||||
|
||||
* Fixed an issue where the Compute Partition tests segfaulted when AMDGPU was loaded with optional parameters.
|
||||
|
||||
#### Known issues
|
||||
##### Known issues
|
||||
|
||||
* When setting CPX as a partition mode, there is a DRM node limit of 64. This is a known limitation when multiple drivers are using the DRM nodes. The `ls /sys/class/drm` command can be used to see the number of DRM nodes, and the following steps can be used to remove unnecessary drivers:
|
||||
|
||||
@@ -562,18 +570,18 @@ See [issue #3772](https://github.com/ROCm/ROCm/issues/3772) on GitHub.
|
||||
2. Remove any unnecessary drivers using `rmmod`. For example, to remove an AST driver, run `sudo rmmod ast`.
|
||||
3. Reload AMDGPU using `modprobe`: `sudo modprobe amdgpu`.
|
||||
|
||||
### **rocPRIM** (3.2.1)
|
||||
#### **rocPRIM** (3.2.1)
|
||||
|
||||
#### Optimizations
|
||||
##### Optimizations
|
||||
|
||||
* Improved performance of `block_reduce_warp_reduce` when warp size equals block size.
|
||||
|
||||
## ROCm known issues
|
||||
### ROCm known issues
|
||||
|
||||
ROCm known issues are tracked on [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). Known issues related to
|
||||
individual components are listed in the [Detailed component changes](detailed-component-changes) section.
|
||||
|
||||
### Instinct MI300X GPU recovery failure on uncorrectable errors
|
||||
#### Instinct MI300X GPU recovery failure on uncorrectable errors
|
||||
|
||||
For the AMD Instinct MI300X accelerator, GPU recovery resets triggered by uncorrectable errors (UE) might not complete
|
||||
successfully, which can result in the system being left in an undefined state. A system reboot is needed to recover from
|
||||
@@ -583,14 +591,14 @@ This issue is under investigation and will be resolved in a future ROCm release.
|
||||
|
||||
See [issue #3766](https://github.com/ROCm/ROCm/issues/3766) on GitHub.
|
||||
|
||||
## ROCm upcoming changes
|
||||
### ROCm upcoming changes
|
||||
|
||||
The following changes to the ROCm software stack are anticipated for future releases.
|
||||
|
||||
### rocm-llvm-alt
|
||||
#### rocm-llvm-alt
|
||||
|
||||
The `rocm-llvm-alt` package will be removed in an upcoming release. Users relying on the functionality provided by the closed-source compiler should transition to the open-source compiler. Once the `rocm-llvm-alt` package is removed, any compilation requesting functionality provided by the closed-source compiler will result in a Clang warning: "*[AMD] proprietary optimization compiler has been removed*".
|
||||
|
||||
### rccl-rdma-sharp-plugins
|
||||
#### rccl-rdma-sharp-plugins
|
||||
|
||||
The RCCL plugin package, `rccl-rdma-sharp-plugins`, will be removed in an upcoming ROCm release.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.2.1"
|
||||
<default revision="refs/tags/rocm-6.2.2"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
|
||||
@@ -91,8 +91,7 @@ additional licenses. Please review individual repositories for more information.
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
|
||||
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com). Currently, only
|
||||
one component of ROCm, `rocm-llvm-alt` is governed by a proprietary license.
|
||||
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com).
|
||||
Proprietary components are organized in a proprietary subdirectory in the package
|
||||
repositories to distinguish from open sourced packages.
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
ROCm Version,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating Systems & kernels <OS-kernel-versions>`,"Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
ROCm Version,6.2.2,6.2.1,6.2.0, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,
|
||||
,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,
|
||||
@@ -16,102 +16,102 @@ ROCm Version,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`PyTorch <rocm-install-on-linux:install/3rd-party/pytorch-install>`,"2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <rocm-install-on-linux:install/3rd-party/tensorflow-install>`,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <rocm-install-on-linux:install/3rd-party/jax-install>`,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,
|
||||
Thrust,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
Thrust,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,
|
||||
KFD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,
|
||||
Tested user space versions,"6.1.x, 6.0.x","6.1.x, 6.0.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
Tested user space versions,"6.1.x, 6.0.x","6.1.x, 6.0.x","6.1.x, 6.0.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`RCCL <rccl:index>`,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.1,0.2.1,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.1,4.2.1,4.2.0,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
`Tensile <https://github.com/ROCm/Tensile>`_,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.2,6.2.1,6.2.0,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.4.05,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,1.0.0,1.0.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.1,rocm-6.2.0,rocm-6.1.5,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.3.0,7.3.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.2,rocm-6.2.1,rocm-6.2.0,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0
|
||||
,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
@@ -17,10 +17,10 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.2.1", "6.2.0", "6.1.0"
|
||||
:header: "ROCm Version", "6.2.2", "6.2.1", "6.1.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating Systems & kernels <OS-kernel-versions>`,"Ubuntu 24.04.1, 24.04",Ubuntu 24.04,
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04
|
||||
,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3"
|
||||
,,,"Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94]_, 9.3, 9.2"
|
||||
@@ -37,7 +37,7 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi300_621]_,gfx942 [#mi300_620]_, gfx942 [#mi300_610]_
|
||||
,gfx942 [#mi300_622]_,gfx942 [#mi300_621]_, gfx942 [#mi300_610]_
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
@@ -63,7 +63,7 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.10.0,2.10.0,2.9.0
|
||||
:doc:`MIOpen <miopen:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.0.0,3.0.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.0.0,1.0.0,1.0.0
|
||||
:doc:`rocAL <rocal:index>`,2.0.0,2.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.1.0,0.1.0,N/A
|
||||
:doc:`RPP <rpp:index>`,1.8.0,1.8.0,1.5.0
|
||||
@@ -75,15 +75,15 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.8.0,0.8.0,0.7.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.15,1.0.14,1.0.14
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.15,1.0.15,1.0.14
|
||||
:doc:`hipFORT <hipfort:index>`,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.11.0,2.11.0,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.1.1,3.1.1,3.0.1
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.1,0.2.1,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.0,3.2.0,3.1.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.1,4.2.0,4.1.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.29,1.0.28,1.0.26
|
||||
:doc:`rocBLAS <rocblas:index>`,4.2.1,4.2.1,4.1.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.29,1.0.29,1.0.26
|
||||
:doc:`rocRAND <rocrand:index>`,3.1.0,3.1.0,3.0.1
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.26.0,3.26.0,3.25.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
@@ -94,30 +94,30 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
:doc:`hipCUB <hipcub:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.3.0,1.3.0,1.2.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.1.0,3.0.1,3.0.1
|
||||
:doc:`rocThrust <rocthrust:index>`,3.1.0,3.1.0,3.0.1
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41134,6.2.41133,6.1.40091
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.1,6.2.0,6.1.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.4.05,20240607.1.4246,20240125.3.30
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.2.41134,6.2.41134,6.1.40091
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.2.2,6.2.1,6.1.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240607.4.05,20240607.4.05,20240125.3.30
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.3,24.6.2,24.4.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`AMD SMI <amdsmi:index>`,24.6.3,24.6.3,24.4.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.0.0,1.0.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.3.0,7.3.0,7.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.1,rocm-6.2.0,rocm-6.1.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.2.2,rocm-6.2.1,rocm-6.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`Omniperf <omniperf:index>`,2.0.1,2.0.1,N/A
|
||||
:doc:`Omnitrace <omnitrace:index>`,1.11.2,1.11.2,N/A
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60201,2.0.60200,2.0.60100
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60202,2.0.60201,2.0.60100
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.4.0,0.4.0,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60201,4.1.60200,4.1.60100
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60202,4.1.60201,4.1.60100
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24355,18.0.0.24232,17.0.0.24103
|
||||
:doc:`HIPIFY <hipify:index>`,18.0.0.24355,18.0.0.24355,17.0.0.24103
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.13.0,0.13.0,0.12.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.76.0,0.76.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.2.0,14.2.0,14.1.0
|
||||
@@ -127,25 +127,25 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24355,18.0.0.24232,17.0.0.24103
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24355,18.0.0.24232,17.0.0.24103
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24355,18.0.0.24232,17.0.0.24103
|
||||
`Flang <https://github.com/ROCm/flang>`_,18.0.0.24355,18.0.0.24355,17.0.0.24103
|
||||
:doc:`llvm-project <llvm-project:index>`,18.0.0.24355,18.0.0.24355,17.0.0.24103
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,18.0.0.24355,18.0.0.24355,17.0.0.24103
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41134,6.2.41133,6.1.40091
|
||||
:doc:`HIP <hip:index>`,6.2.41134,6.2.41133,6.1.40091
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.2.41134,6.2.41134,6.1.40091
|
||||
:doc:`HIP <hip:index>`,6.2.41134,6.2.41134,6.1.40091
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.14.0,1.13.0,1.13.0
|
||||
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.14.0,1.14.0,1.13.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#red-hat94] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#red-hat94] RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_622] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_621] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_610] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#kfd_support] ROCm provides forward and backward compatibility between the Kernel Fusion Driver (KFD) and its user space software for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#kfd_support] ROCm provides forward and backward compatibility between the Kernel Fusion Driver (KFD) and its user space software for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
|
||||
|
||||
.. _OS-kernel-versions:
|
||||
@@ -207,7 +207,6 @@ Expand for full historical view of:
|
||||
|
||||
.. csv-table::
|
||||
:file: compatibility-matrix-historical-6.0.csv
|
||||
:widths: 20,10,10,10,10,10,10,10,10
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
@@ -215,6 +214,7 @@ Expand for full historical view of:
|
||||
|
||||
.. [#red-hat94-past-60] RHEL 9.4 is supported only on AMD Instinct MI300A.
|
||||
.. [#oracle89-past-60] Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
@@ -222,4 +222,4 @@ Expand for full historical view of:
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#kfd_support-past-60] ROCm provides forward and backward compatibility between the Kernel Fusion Driver (KFD) and its user space software for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#kfd_support-past-60] ROCm provides forward and backward compatibility between the Kernel Fusion Driver (KFD) and its user space software for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
|
||||
@@ -30,16 +30,15 @@ if os.environ.get("READTHEDOCS", "") == "True":
|
||||
project = "ROCm Documentation"
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.2.1"
|
||||
release = "6.2.1"
|
||||
version = "6.2.2"
|
||||
release = "6.2.2"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux", "windows"], "date": "2024-09-20"},
|
||||
{"file": "about/changelog", "os": ["linux", "windows"], "date": "2024-09-20"},
|
||||
{"file": "about/release-notes", "os": ["linux", "windows"], "date": "2024-09-27"},
|
||||
{"file": "how-to/deep-learning-rocm", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
|
||||
@@ -82,7 +81,6 @@ article_pages = [
|
||||
"file": "how-to/llm-fine-tuning-optimization/profiling-and-debugging",
|
||||
"os": ["linux"],
|
||||
},
|
||||
{"file": "how-to/performance-validation/mi300x/vllm-benchmark", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/index", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
|
||||
{"file": "how-to/system-optimization/mi200", "os": ["linux"]},
|
||||
|
||||
@@ -16,7 +16,7 @@ This section discusses how to implement `vLLM <https://docs.vllm.ai/en/latest>`_
|
||||
vLLM inference
|
||||
==============
|
||||
|
||||
vLLM is renowned for its PagedAttention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
vLLM is renowned for its paged attention algorithm that can reduce memory consumption and increase throughput thanks to
|
||||
its paging scheme. Instead of allocating GPU high-bandwidth memory (HBM) for the maximum output token lengths of the
|
||||
models, the paged attention of vLLM allocates GPU HBM dynamically for its actual decoding lengths. This paged attention
|
||||
is also effective when multiple requests share the same key and value contents for a large value of beam search or
|
||||
@@ -139,7 +139,9 @@ Refer to :ref:`mi300x-vllm-optimization` for performance optimization tips.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
format. For more information, see the guide to
|
||||
`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator <https://github.com/ROCm/MAD/blob/develop/benchmark/vllm/README.md>`_
|
||||
on the ROCm GitHub repository.
|
||||
|
||||
.. _fine-tuning-llms-tgi:
|
||||
|
||||
|
||||
@@ -1,407 +0,0 @@
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified
|
||||
ROCm Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
***********************************************************
|
||||
LLM inference performance validation on AMD Instinct MI300X
|
||||
***********************************************************
|
||||
|
||||
.. _vllm-benchmark-unified-docker:
|
||||
|
||||
The `ROCm vLLM Docker <https://hub.docker.com/r/rocm/vllm/tags>`_ image offers
|
||||
a prebuilt, optimized environment designed for validating large language model
|
||||
(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This
|
||||
ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the
|
||||
MI300X accelerator and includes the following components:
|
||||
|
||||
* `ROCm 6.2.1 <https://github.com/ROCm/ROCm>`_
|
||||
|
||||
* `vLLM 0.6.4 <https://docs.vllm.ai/en/latest>`_
|
||||
|
||||
* `PyTorch 2.5.0 <https://github.com/pytorch/pytorch>`_
|
||||
|
||||
* Tuning files (in CSV format)
|
||||
|
||||
With this Docker image, you can quickly validate the expected inference
|
||||
performance numbers on the MI300X accelerator. This topic also provides tips on
|
||||
optimizing performance with popular AI models.
|
||||
|
||||
.. hlist::
|
||||
:columns: 6
|
||||
|
||||
* Llama 3.1 8B
|
||||
|
||||
* Llama 3.1 70B
|
||||
|
||||
* Llama 3.1 405B
|
||||
|
||||
* Llama 2 7B
|
||||
|
||||
* Llama 2 70B
|
||||
|
||||
* Mixtral 8x7B
|
||||
|
||||
* Mixtral 8x22B
|
||||
|
||||
* Mixtral 7B
|
||||
|
||||
* Qwen2 7B
|
||||
|
||||
* Qwen2 72B
|
||||
|
||||
* JAIS 13B
|
||||
|
||||
* JAIS 30B
|
||||
|
||||
.. _vllm-benchmark-vllm:
|
||||
|
||||
.. note::
|
||||
|
||||
vLLM is a toolkit and library for LLM inference and serving. AMD implements
|
||||
high-performance custom kernels and modules in vLLM to enhance performance.
|
||||
See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
|
||||
more information.
|
||||
|
||||
Getting started
|
||||
===============
|
||||
|
||||
Use the following procedures to reproduce the benchmark results on an
|
||||
MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
.. _vllm-benchmark-get-started:
|
||||
|
||||
1. Disable NUMA auto-balancing.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# disable automatic NUMA balancing
|
||||
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
# check if NUMA balancing is disabled (returns 0 if disabled)
|
||||
cat /proc/sys/kernel/numa_balancing
|
||||
0
|
||||
|
||||
2. Download the :ref:`ROCm vLLM Docker image <vllm-benchmark-unified-docker>`.
|
||||
|
||||
Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
|
||||
|
||||
Once setup is complete, you can choose between two options to reproduce the
|
||||
benchmark results:
|
||||
|
||||
- :ref:`MAD-integrated benchmarking <vllm-benchmark-mad>`
|
||||
|
||||
- :ref:`Standalone benchmarking <vllm-benchmark-standalone>`
|
||||
|
||||
.. _vllm-benchmark-mad:
|
||||
|
||||
MAD-integrated benchmarking
|
||||
===========================
|
||||
|
||||
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
Use this command to run a performance benchmark test of the Llama 3.1 8B model
|
||||
on one GPU with ``float16`` data type in the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
ROCm MAD launches a Docker container with the name
|
||||
``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the
|
||||
model are collected in the following path: ``~/MAD/reports_float16/``.
|
||||
|
||||
Although the following models are preconfigured to collect latency and
|
||||
throughput performance data, you can also change the benchmarking parameters.
|
||||
Refer to the :ref:`Standalone benchmarking <vllm-benchmark-standalone>` section.
|
||||
|
||||
Available models
|
||||
----------------
|
||||
|
||||
.. hlist::
|
||||
:columns: 3
|
||||
|
||||
* ``pyt_vllm_llama-3.1-8b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-70b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-405b``
|
||||
|
||||
* ``pyt_vllm_llama-2-7b``
|
||||
|
||||
* ``pyt_vllm_llama-2-70b``
|
||||
|
||||
* ``pyt_vllm_mixtral-8x7b``
|
||||
|
||||
* ``pyt_vllm_mixtral-8x22b``
|
||||
|
||||
* ``pyt_vllm_mistral-7b``
|
||||
|
||||
* ``pyt_vllm_qwen2-7b``
|
||||
|
||||
* ``pyt_vllm_qwen2-72b``
|
||||
|
||||
* ``pyt_vllm_jais-13b``
|
||||
|
||||
* ``pyt_vllm_jais-30b``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-8b_fp8``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-70b_fp8``
|
||||
|
||||
* ``pyt_vllm_llama-3.1-405b_fp8``
|
||||
|
||||
* ``pyt_vllm_mixtral-8x7b_fp8``
|
||||
|
||||
* ``pyt_vllm_mixtral-8x22b_fp8``
|
||||
|
||||
.. _vllm-benchmark-standalone:
|
||||
|
||||
Standalone benchmarking
|
||||
=======================
|
||||
|
||||
You can run the vLLM benchmark tool independently by starting the
|
||||
:ref:`Docker container <vllm-benchmark-get-started>` as shown in the following
|
||||
snippet.
|
||||
|
||||
.. code-block::
|
||||
|
||||
docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
|
||||
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name vllm_v0.6.4 rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
|
||||
|
||||
In the Docker container, clone the ROCm MAD repository and navigate to the
|
||||
benchmark scripts directory at ``~/MAD/scripts/vllm``.
|
||||
|
||||
.. code-block::
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/vllm
|
||||
|
||||
Command
|
||||
-------
|
||||
|
||||
To start the benchmark, use the following command with the appropriate options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype
|
||||
|
||||
See the :ref:`examples <vllm-benchmark-run-benchmark>` for more information.
|
||||
|
||||
.. note::
|
||||
|
||||
The input sequence length, output sequence length, and tensor parallel (TP) are
|
||||
already configured. You don't need to specify them with this script.
|
||||
|
||||
.. note::
|
||||
|
||||
If you encounter the following error, pass your access-authorized Hugging
|
||||
Face token to the gated models.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
OSError: You are trying to access a gated repo.
|
||||
|
||||
# pass your HF_TOKEN
|
||||
export HF_TOKEN=$your_personal_hf_token
|
||||
|
||||
.. _vllm-benchmark-standalone-options:
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:align: center
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
* - ``$test_option``
|
||||
- latency
|
||||
- Measure decoding token latency
|
||||
|
||||
* -
|
||||
- throughput
|
||||
- Measure token generation throughput
|
||||
|
||||
* -
|
||||
- all
|
||||
- Measure both throughput and latency
|
||||
|
||||
* - ``$model_repo``
|
||||
- ``meta-llama/Meta-Llama-3.1-8B-Instruct``
|
||||
- Llama 3.1 8B
|
||||
|
||||
* - (``float16``)
|
||||
- ``meta-llama/Meta-Llama-3.1-70B-Instruct``
|
||||
- Llama 3.1 70B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Meta-Llama-3.1-405B-Instruct``
|
||||
- Llama 3.1 405B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Llama-2-7b-chat-hf``
|
||||
- Llama 2 7B
|
||||
|
||||
* -
|
||||
- ``meta-llama/Llama-2-70b-chat-hf``
|
||||
- Llama 2 70B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x7B-Instruct-v0.1``
|
||||
- Mixtral 8x7B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mixtral-8x22B-Instruct-v0.1``
|
||||
- Mixtral 8x22B
|
||||
|
||||
* -
|
||||
- ``mistralai/Mistral-7B-Instruct-v0.3``
|
||||
- Mixtral 7B
|
||||
|
||||
* -
|
||||
- ``Qwen/Qwen2-7B-Instruct``
|
||||
- Qwen2 7B
|
||||
|
||||
* -
|
||||
- ``Qwen/Qwen2-72B-Instruct``
|
||||
- Qwen2 72B
|
||||
|
||||
* -
|
||||
- ``core42/jais-13b-chat``
|
||||
- JAIS 13B
|
||||
|
||||
* -
|
||||
- ``core42/jais-30b-chat-v3``
|
||||
- JAIS 30B
|
||||
|
||||
* - ``$model_repo``
|
||||
- ``amd/Meta-Llama-3.1-8B-Instruct-FP8-KV``
|
||||
- Llama 3.1 8B
|
||||
|
||||
* - (``float8``)
|
||||
- ``amd/Meta-Llama-3.1-70B-Instruct-FP8-KV``
|
||||
- Llama 3.1 70B
|
||||
|
||||
* -
|
||||
- ``amd/Meta-Llama-3.1-405B-Instruct-FP8-KV``
|
||||
- Llama 3.1 405B
|
||||
|
||||
* -
|
||||
- ``amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV``
|
||||
- Mixtral 8x7B
|
||||
|
||||
* -
|
||||
- ``amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV``
|
||||
- Mixtral 8x22B
|
||||
|
||||
* - ``$num_gpu``
|
||||
- 1 or 8
|
||||
- Number of GPUs
|
||||
|
||||
* - ``$datatype``
|
||||
- ``float16`` or ``float8``
|
||||
- Data type
|
||||
|
||||
.. _vllm-benchmark-run-benchmark:
|
||||
|
||||
Running the benchmark on the MI300X accelerator
|
||||
-----------------------------------------------
|
||||
|
||||
Here are some examples of running the benchmark with various options.
|
||||
See :ref:`Options <vllm-benchmark-standalone-options>` for the list of
|
||||
options and their descriptions.
|
||||
|
||||
Example 1: latency benchmark
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
|
||||
|
||||
.. code-block::
|
||||
|
||||
./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
./vllm_benchmark_report.sh -s latency -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8
|
||||
|
||||
Find the latency reports at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv``
|
||||
|
||||
- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_latency_report.csv``
|
||||
|
||||
Example 2: throughput benchmark
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16
|
||||
./vllm_benchmark_report.sh -s throughput -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8
|
||||
|
||||
Find the throughput reports at:
|
||||
|
||||
- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv``
|
||||
|
||||
- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_throughput_report.csv``
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<style>
|
||||
mjx-container[jax="CHTML"][display="true"] {
|
||||
text-align: left;
|
||||
margin: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
.. note::
|
||||
|
||||
Throughput is calculated as:
|
||||
|
||||
- .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
|
||||
|
||||
- .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
|
||||
|
||||
- For a list of other ready-made Docker images for ROCm, see the
|
||||
:doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
|
||||
|
||||
- To compare with the previous version of the ROCm vLLM Docker image for performance validation, refer to
|
||||
`LLM inference performance validation on AMD Instinct MI300X (ROCm 6.2.0) <https://rocm.docs.amd.com/en/docs-6.2.0/how-to/performance-validation/mi300x/vllm-benchmark.html>`_.
|
||||
|
||||
@@ -46,7 +46,9 @@ Validating vLLM performance
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
format. For more information, see the guide to
|
||||
`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator <https://github.com/ROCm/MAD/blob/develop/benchmark/vllm/README.md>`_
|
||||
on the ROCm GitHub repository.
|
||||
|
||||
.. _rocm-for-ai-serve-hugging-face-tgi:
|
||||
|
||||
|
||||
@@ -342,8 +342,8 @@ If SMT is enabled by setting "CCD/Core/Thread Enablement > SMT Control" to
|
||||
(logical) cores of the system:
|
||||
|
||||
* In the server BIOS, set IOMMU to "Enabled".
|
||||
* When configuring the Grub boot loader, add the following argument for the
|
||||
Linux kernel: `iommu=pt`
|
||||
* When configuring the Grub boot loader, add the following arguments for the
|
||||
Linux kernel: `amd_iommu=on iommu=pt`
|
||||
* Update Grub to use the modified configuration:
|
||||
|
||||
```shell
|
||||
@@ -355,7 +355,7 @@ If SMT is enabled by setting "CCD/Core/Thread Enablement > SMT Control" to
|
||||
|
||||
```none
|
||||
[...]
|
||||
[ 0.000000] Kernel command line: [...] iommu=pt
|
||||
[ 0.000000] Kernel command line: [...] amd_iommu=on iommu=pt
|
||||
[...]
|
||||
```
|
||||
|
||||
|
||||
@@ -327,8 +327,8 @@ If SMT is enabled by setting "CCD/Core/Thread Enablement > SMT Control" to
|
||||
(logical) cores of the system:
|
||||
|
||||
* In the server BIOS, set IOMMU to "Enabled".
|
||||
* When configuring the Grub boot loader, add the following argument for the
|
||||
Linux kernel: `iommu=pt`
|
||||
* When configuring the Grub boot loader, add the following arguments for the
|
||||
Linux kernel: `amd_iommu=on iommu=pt`
|
||||
* Update Grub to use the modified configuration:
|
||||
|
||||
```shell
|
||||
@@ -340,7 +340,7 @@ If SMT is enabled by setting "CCD/Core/Thread Enablement > SMT Control" to
|
||||
|
||||
```none
|
||||
[...]
|
||||
[ 0.000000] Kernel command line: [...] iommu=pt
|
||||
[ 0.000000] Kernel command line: [...] amd_iommu=on iommu=pt
|
||||
[...]
|
||||
```
|
||||
|
||||
|
||||
@@ -299,7 +299,7 @@ For a system that has AMD host CPUs add this to ``GRUB_CMDLINE_LINUX``:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
iommu=pt
|
||||
amd_iommu=on iommu=pt
|
||||
|
||||
Otherwise, if the system has Intel host CPUs add this instead to
|
||||
``GRUB_CMDLINE_LINUX``:
|
||||
@@ -500,7 +500,7 @@ If SMT is enabled by setting ``CCD/Core/Thread Enablement > SMT Control`` to
|
||||
|
||||
#. In the server BIOS, set IOMMU to ``Enabled``.
|
||||
|
||||
#. When configuring the GRUB boot loader, add the following argument for the Linux kernel: ``iommu=pt``.
|
||||
#. When configuring the GRUB boot loader, add the following arguments for the Linux kernel: ``amd_iommu=on iommu=pt``.
|
||||
|
||||
#. Update GRUB.
|
||||
|
||||
@@ -515,7 +515,7 @@ If SMT is enabled by setting ``CCD/Core/Thread Enablement > SMT Control`` to
|
||||
.. code-block:: shell
|
||||
|
||||
[...]
|
||||
[ 0.000000] Kernel command line: [...] iommu=pt
|
||||
[ 0.000000] Kernel command line: [...] amd_iommu=on iommu=pt
|
||||
[...]
|
||||
|
||||
Once the system is properly configured, ROCm software can be
|
||||
|
||||
@@ -111,7 +111,7 @@ sudo virsh net-start default /*to enable Virtual network by default
|
||||
Enable input-output memory management unit (IOMMU) in GRUB settings by adding the following line to `/etc/default/grub`:
|
||||
|
||||
```none
|
||||
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash" for AMD CPU
|
||||
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amd_iommu=on" for AMD CPU
|
||||
```
|
||||
|
||||
Update grub and reboot
|
||||
|
||||
@@ -8,8 +8,6 @@ accelerators. They include detailed instructions on system settings and
|
||||
application tuning suggestions to help you fully leverage the capabilities of
|
||||
these accelerators, thereby achieving optimal performance.
|
||||
|
||||
* :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/system`
|
||||
|
||||
* :doc:`/how-to/tuning-guides/mi300x/workload`
|
||||
|
||||
@@ -152,7 +152,9 @@ address any new bottlenecks that may emerge.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image that has everything required to implement
|
||||
the tips in this section. It includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
format. For more information, see the guide to
|
||||
`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator <https://github.com/ROCm/MAD/blob/develop/benchmark/vllm/README.md>`_
|
||||
on the ROCm GitHub repository.
|
||||
|
||||
.. _mi300x-profiling-tools:
|
||||
|
||||
@@ -376,10 +378,11 @@ Refer to `vLLM documentation <https://docs.vllm.ai/en/latest/models/performance.
|
||||
for additional performance tips. :ref:`fine-tuning-llms-vllm` describes vLLM
|
||||
usage with ROCm.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance
|
||||
of LLM inference with vLLM on the MI300X accelerator. The Docker image includes
|
||||
ROCm, vLLM, PyTorch, and tuning files in the CSV format. For more information,
|
||||
see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`.
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see the guide to
|
||||
`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator <https://github.com/ROCm/MAD/blob/develop/benchmark/vllm/README.md>`_
|
||||
on the ROCm GitHub repository.
|
||||
|
||||
Maximize throughput
|
||||
-------------------
|
||||
|
||||
@@ -45,8 +45,7 @@ ROCm documentation is organized into the following categories:
|
||||
* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
|
||||
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
|
||||
* [System optimization](./how-to/system-optimization/index.rst)
|
||||
* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [GPU cluster networking](https://dcgpu.docs.amd.com/projects/gpu-cluster-networking/en/latest/index.html)
|
||||
* [AMD Instinct MI300X tuning guides](./how-to/tuning-guides/mi300x/index.rst)
|
||||
* [System debugging](./how-to/system-debugging.md)
|
||||
* [Using MPI](./how-to/gpu-enabled-mpi.rst)
|
||||
* [Using advanced compiler features](./conceptual/compiler-topics.md)
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
| Version | Release date |
|
||||
| ------- | ------------ |
|
||||
| [6.2.2](https://rocm.docs.amd.com/en/docs-6.2.2/) | September 27, 2024 |
|
||||
| [6.2.1](https://rocm.docs.amd.com/en/docs-6.2.1/) | September 20, 2024 |
|
||||
| [6.2.0](https://rocm.docs.amd.com/en/docs-6.2.0/) | August 2, 2024 |
|
||||
| [6.1.2](https://rocm.docs.amd.com/en/docs-6.1.2/) | June 4, 2024 |
|
||||
|
||||
@@ -70,11 +70,9 @@ subtrees:
|
||||
- file: how-to/system-optimization/w6000-v620.md
|
||||
title: AMD RDNA 2
|
||||
- file: how-to/tuning-guides/mi300x/index.rst
|
||||
title: AMD MI300X performance validation and tuning
|
||||
title: AMD MI300X tuning guides
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: how-to/performance-validation/mi300x/vllm-benchmark.rst
|
||||
title: Performance validation
|
||||
- file: how-to/tuning-guides/mi300x/system.rst
|
||||
title: System tuning
|
||||
- file: how-to/tuning-guides/mi300x/workload.rst
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.1.1"
|
||||
<default revision="refs/tags/rocm-6.2.2"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
|
||||
@@ -184,5 +184,5 @@ clinfo, and HelloWord.cl and cause a system crash.
|
||||
* IRQ remapping does not support X2APIC mode
|
||||
* NMI error
|
||||
|
||||
Workaround: To avoid the system crash, add `iommu=pt` as the kernel bootparam, as
|
||||
Workaround: To avoid the system crash, add `amd_iommu=on iommu=pt` as the kernel bootparam, as
|
||||
indicated in the warning message.
|
||||
|
||||
@@ -1,3 +1,28 @@
|
||||
The release notes provide a comprehensive summary of changes since the previous ROCm release.
|
||||
|
||||
- [Release highlights](release-highlights)
|
||||
|
||||
- [Operating system and hardware support changes](operating-system-and-hardware-support-changes)
|
||||
|
||||
- [ROCm components versioning](rocm-components)
|
||||
|
||||
- [Detailed component changes](detailed-component-changes)
|
||||
|
||||
- [ROCm known issues](rocm-known-issues)
|
||||
|
||||
- [ROCm upcoming changes](rocm-upcoming-changes)
|
||||
|
||||
The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.0/compatibility/compatibility-matrix.html)
|
||||
provides an overview of operating system, hardware, ecosystem, and ROCm component support across ROCm releases.
|
||||
|
||||
Release notes for previous ROCm releases are available in earlier versions of the documentation.
|
||||
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html).
|
||||
|
||||
## Release highlights
|
||||
|
||||
This section introduces notable new features and improvements in ROCm 6.2. See the
|
||||
[Detailed component changes](#detailed-component-changes) for individual component changes.
|
||||
|
||||
### New components
|
||||
|
||||
ROCm 6.2.0 introduces the following new components to the ROCm software stack.
|
||||
|
||||
@@ -1,6 +1,30 @@
|
||||
The release notes provide a summary of notable changes since the previous ROCm release.
|
||||
|
||||
- [Release highlights](release-highlights)
|
||||
|
||||
- [Operating system and hardware support changes](operating-system-and-hardware-support-changes)
|
||||
|
||||
- [ROCm components versioning](rocm-components)
|
||||
|
||||
- [Detailed component changes](detailed-component-changes)
|
||||
|
||||
- [ROCm known issues](rocm-known-issues)
|
||||
|
||||
- [ROCm upcoming changes](rocm-upcoming-changes)
|
||||
|
||||
The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.1/compatibility/compatibility-matrix.html)
|
||||
provides the full list of supported hardware, operating systems, ecosystems, third-party components, and ROCm components for each ROCm release.
|
||||
|
||||
Release notes for previous ROCm releases are available in earlier versions of the documentation.
|
||||
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html).
|
||||
|
||||
## Release highlights
|
||||
|
||||
The following are notable new features and improvements in ROCm 6.2.1. For changes to individual components, see [Detailed component changes](#detailed-component-changes).
|
||||
|
||||
### rocAL major version change
|
||||
|
||||
The new version of rocAL introduces many new features, but does not modify any of the existing public API functions.However, the version number was incremented from 1.3 to 2.0.
|
||||
The new version of rocAL introduces many new features, but does not modify any of the existing public API functions. However, the version number was incremented from 1.3 to 2.0.
|
||||
Applications linked to version 1.3 must be recompiled to link against version 2.0.
|
||||
|
||||
See [the rocAL detailed changes](#rocal-2-0-0) for more information.
|
||||
@@ -9,12 +33,12 @@ See [the rocAL detailed changes](#rocal-2-0-0) for more information.
|
||||
|
||||
As of ROCm 6.2.1, ROCm supports Facebook General Matrix Multiplication (FBGEMM) and the related FBGEMM_GPU library.
|
||||
|
||||
FBGEMM is a low-precision, high-performance CPU kernel library for convolution and matrix multiplication. It is used for server-side inference and as a back end for PyTorch quantized operators. FBGEMM_GPU includes a collection of PyTorch GPU operator libraries for training and inference. For more information, see the ROCm [Model acceleration libraries guide](https://rocm.docs.amd.com/en/6.2.1/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.html)
|
||||
FBGEMM is a low-precision, high-performance CPU kernel library for convolution and matrix multiplication. It is used for server-side inference and as a back end for PyTorch quantized operators. FBGEMM_GPU includes a collection of PyTorch GPU operator libraries for training and inference. For more information, see the ROCm [Model acceleration libraries guide](https://rocm.docs.amd.com/en/docs-6.2.1/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.html)
|
||||
and [PyTorch's FBGEMM GitHub repository](https://github.com/pytorch/FBGEMM).
|
||||
|
||||
### ROCm Offline Installer Creator changes
|
||||
|
||||
The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/install-on-linux/en/6.2.1/install/rocm-offline-installer.html) introduces several new features and improvements including:
|
||||
The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.1/install/rocm-offline-installer.html) introduces several new features and improvements including:
|
||||
|
||||
* Logging support for create and install logs
|
||||
* More stringent checks for Linux versions and distributions
|
||||
@@ -26,8 +50,8 @@ The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/in
|
||||
There have been no changes to supported hardware or operating systems from ROCm 6.2.0 to ROCm 6.2.1.
|
||||
|
||||
* The Programming Model Reference and Understanding the Programming Model topics in HIP have been consolidated into one topic,
|
||||
[HIP programming model (conceptual)](https://rocm.docs.amd.com/projects/HIP/en/6.2.1/understand/programming_model.html).
|
||||
* The [HIP virtual memory management](https://rocm.docs.amd.com/projects/HIP/en/6.2.1/how-to/virtual_memory.html) and [HIP virtual memory management API](https://rocm.docs.amd.com/projects/HIP/en/6.2.1/reference/virtual_memory_reference.html) topics have been added.
|
||||
[HIP programming model (conceptual)](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/understand/programming_model.html).
|
||||
* The [HIP virtual memory management](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/how-to/virtual_memory.html) and [HIP virtual memory management API](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/reference/virtual_memory_reference.html) topics have been added.
|
||||
|
||||
```{note}
|
||||
The ROCm documentation, like all ROCm projects, is open source and available on GitHub. To contribute to ROCm documentation, see the [ROCm documentation contribution guidelines](https://rocm.docs.amd.com/en/latest/contribute/contributing.html).
|
||||
|
||||
31
tools/autotag/templates/highlights/6.2.2.md
Normal file
31
tools/autotag/templates/highlights/6.2.2.md
Normal file
@@ -0,0 +1,31 @@
|
||||
These release notes provide a summary of notable changes since the previous ROCm release.
|
||||
|
||||
```{note}
|
||||
As ROCm 6.2.2 was released shortly after 6.2.1, the changes between these versions
|
||||
are minimal. For a comprehensive overview of recent updates, the ROCm 6.2.1 release
|
||||
notes are appended to the end of this document.
|
||||
|
||||
For detailed information about the changes in ROCm 6.2.1, refer to the appended
|
||||
section: [ROCm 6.2.1 release notes](rocm-6-2-1-release-notes).
|
||||
```
|
||||
|
||||
The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.2/compatibility/compatibility-matrix.html)
|
||||
provides the full list of supported hardware, operating systems, ecosystems, third-party components, and ROCm components
|
||||
for each ROCm release.
|
||||
|
||||
Release notes for previous ROCm releases are available in earlier versions of the documentation.
|
||||
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html).
|
||||
|
||||
## Release highlights
|
||||
|
||||
The following is a significant fix introduced in ROCm 6.2.2.
|
||||
|
||||
### Fixed Instinct MI300X error recovery failure
|
||||
|
||||
Improved the reliability of AMD Instinct MI300X accelerators in scenarios involving
|
||||
uncorrectable errors. Previously, error recovery did not occur as expected,
|
||||
potentially leaving the system in an undefined state. This fix ensures that error
|
||||
recovery functions as expected, maintaining system stability.
|
||||
|
||||
See the [original issue](#instinct-mi300x-gpu-recovery-failure-on-uncorrectable-errors)
|
||||
noted in the ROCm 6.2.1 release notes.
|
||||
86
tools/autotag/templates/known_issues/6.2.0.md
Normal file
86
tools/autotag/templates/known_issues/6.2.0.md
Normal file
@@ -0,0 +1,86 @@
|
||||
## ROCm known issues
|
||||
|
||||
ROCm known issues are noted on {fab}`github` [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). For known
|
||||
issues related to individual components, review the [Detailed component changes](detailed-component-changes).
|
||||
|
||||
### Default processor affinity behavior for helper threads
|
||||
|
||||
Processor affinity is a critical setting to ensure that ROCm helper threads run on the correct cores. By default, ROCm
|
||||
helper threads are spawned on all available cores, ignoring the parent thread’s processor affinity. This can lead to
|
||||
threads competing for available cores, which may result in suboptimal performance. This behavior occurs by default if
|
||||
the environment variable `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is not set or is set to `1`. If
|
||||
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is set to `0`, the ROCr runtime uses the parent process's core affinity mask when
|
||||
creating helper threads. The parent’s affinity mask should then be set to account for the presence of additional threads
|
||||
by ensuring the affinity mask contains enough cores. Depending on the affinity settings of the software environment,
|
||||
batch system, launch commands like `numactl`/`taskset`, or explicit mask manipulation by the application itself, changing
|
||||
the setting may be advantageous to performance.
|
||||
|
||||
To ensure the parent's core affinity mask is honored by the ROCm helper threads, set the
|
||||
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable as follows:
|
||||
|
||||
```{code} shell
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=0
|
||||
```
|
||||
|
||||
To ensure ROCm helper threads run on all available cores, set the `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable
|
||||
as follows:
|
||||
|
||||
``` shell
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=1
|
||||
```
|
||||
|
||||
Or the default:
|
||||
|
||||
``` shell
|
||||
|
||||
unset HSA_OVERRIDE_CPU_AFFINITY_DEBUG
|
||||
```
|
||||
|
||||
If unsure of the default processor affinity settings for your environment, run the following command from the shell:
|
||||
|
||||
``` shell
|
||||
|
||||
bash -c "echo taskset -p \$\$"
|
||||
```
|
||||
|
||||
See [issue #3493](https://github.com/ROCm/ROCm/issues/3493) on GitHub.
|
||||
|
||||
### Display issues on servers with Instinct MI300-series accelerators when loading AMDGPU driver
|
||||
|
||||
AMD Instinct MI300-series accelerators and third-party GPUs such as the Matrox G200 have an issue impacting video
|
||||
output. The issue was reproduced on a Dell server model PowerEdge XE9680. Servers from other vendors utilizing Matrox
|
||||
G200 cards may be impacted as well. This issue was found with ROCm 6.2.0 but is present in older ROCm versions.
|
||||
|
||||
The AMDGPU driver shipped with ROCm interferes with the operation of the display card video output. On Dell systems,
|
||||
this includes both the local video output and remote access via iDRAC. The display appears blank (black) after loading
|
||||
the `amdgpu` driver modules. Video output impacts both terminal access when running in `runlevel 3` and GUI access when
|
||||
running in `runlevel 5`. Server functionality can still be accessed via SSH or other remote connection methods.
|
||||
|
||||
See [issue #3494](https://github.com/ROCm/ROCm/issues/3494) on GitHub.
|
||||
|
||||
### KFDTest failure on Instinct MI300X with Oracle Linux 8.9
|
||||
|
||||
The `KFDEvictTest.QueueTest` is failing on the MI300X platform during KFD (Kernel Fusion Driver) tests, causing the full
|
||||
suite to not execute properly. This issue is suspected to be hardware-related.
|
||||
|
||||
See [issue #3495](https://github.com/ROCm/ROCm/issues/3495) on GitHub.
|
||||
|
||||
### Bandwidth limitation in gang and non-gang modes on Instinct MI300A
|
||||
|
||||
Expected target peak non-gang performance (~60GB/s) and target peak gang performance (~90GB/s) are not achieved. Both gang
|
||||
and non-gang performance are observed to be limited at 45GB/s.
|
||||
|
||||
This issue will be addressed in a future ROCm release.
|
||||
|
||||
See [issue #3496](https://github.com/ROCm/ROCm/issues/3496) on GitHub.
|
||||
|
||||
### rocm-llvm-alt
|
||||
|
||||
ROCm provides an optional package -- `rocm-llvm-alt` -- that provides a closed-source compiler for
|
||||
users interested in additional closed-source CPU optimizations. This feature is not functional in
|
||||
the ROCm 6.2.0 release. Users who attempt to invoke the closed-source compiler will experience an
|
||||
LLVM consumer-producer mismatch and the compilation will fail. There is no workaround that allows
|
||||
use of the closed-source compiler. It is recommended to compile using the default open-source
|
||||
compiler, which generates high-quality AMD CPU and AMD GPU code.
|
||||
|
||||
See [issue #3492](https://github.com/ROCm/ROCm/issues/3492) on GitHub.
|
||||
@@ -1,3 +1,8 @@
|
||||
## ROCm known issues
|
||||
|
||||
ROCm known issues are tracked on [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). Known issues related to
|
||||
individual components are listed in the [Detailed component changes](detailed-component-changes) section.
|
||||
|
||||
### Instinct MI300X GPU recovery failure on uncorrectable errors
|
||||
|
||||
For the AMD Instinct MI300X accelerator, GPU recovery resets triggered by uncorrectable errors (UE) might not complete
|
||||
|
||||
@@ -1,85 +1,3 @@
|
||||
### Default processor affinity behavior for helper threads
|
||||
|
||||
Processor affinity is a critical setting to ensure that ROCm helper threads run on the correct cores. By default, ROCm
|
||||
helper threads are spawned on all available cores, ignoring the parent thread’s processor affinity. This can lead to
|
||||
threads competing for available cores, which may result in suboptimal performance. This behavior occurs by default if
|
||||
the environment variable `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is not set or is set to `1`. If
|
||||
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is set to `0`, the ROCr runtime uses the parent process's core affinity mask when
|
||||
creating helper threads. The parent’s affinity mask should then be set to account for the presence of additional threads
|
||||
by ensuring the affinity mask contains enough cores. Depending on the affinity settings of the software environment,
|
||||
batch system, launch commands like `numactl`/`taskset`, or explicit mask manipulation by the application itself, changing
|
||||
the setting may be advantageous to performance.
|
||||
|
||||
To ensure the parent's core affinity mask is honored by the ROCm helper threads, set the
|
||||
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable as follows:
|
||||
|
||||
```{code} shell
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=0
|
||||
```
|
||||
|
||||
To ensure ROCm helper threads run on all available cores, set the `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable
|
||||
as follows:
|
||||
|
||||
``` shell
|
||||
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=1
|
||||
```
|
||||
|
||||
Or the default:
|
||||
|
||||
``` shell
|
||||
|
||||
unset HSA_OVERRIDE_CPU_AFFINITY_DEBUG
|
||||
```
|
||||
|
||||
If unsure of the default processor affinity settings for your environment, run the following command from the shell:
|
||||
|
||||
``` shell
|
||||
|
||||
bash -c "echo taskset -p \$\$"
|
||||
```
|
||||
|
||||
See [issue #3493](https://github.com/ROCm/ROCm/issues/3493) on GitHub.
|
||||
|
||||
### Display issues on servers with Instinct MI300-series accelerators when loading AMDGPU driver
|
||||
|
||||
AMD Instinct MI300-series accelerators and third-party GPUs such as the Matrox G200 have an issue impacting video
|
||||
output. The issue was reproduced on a Dell server model PowerEdge XE9680. Servers from other vendors utilizing Matrox
|
||||
G200 cards may be impacted as well. This issue was found with ROCm 6.2.0 but is present in older ROCm versions.
|
||||
|
||||
The AMDGPU driver shipped with ROCm interferes with the operation of the display card video output. On Dell systems,
|
||||
this includes both the local video output and remote access via iDRAC. The display appears blank (black) after loading
|
||||
the `amdgpu` driver modules. Video output impacts both terminal access when running in `runlevel 3` and GUI access when
|
||||
running in `runlevel 5`. Server functionality can still be accessed via SSH or other remote connection methods.
|
||||
|
||||
See [issue #3494](https://github.com/ROCm/ROCm/issues/3494) on GitHub.
|
||||
|
||||
### KFDTest failure on Instinct MI300X with Oracle Linux 8.9
|
||||
|
||||
The `KFDEvictTest.QueueTest` is failing on the MI300X platform during KFD (Kernel Fusion Driver) tests, causing the full
|
||||
suite to not execute properly. This issue is suspected to be hardware-related.
|
||||
|
||||
See [issue #3495](https://github.com/ROCm/ROCm/issues/3495) on GitHub.
|
||||
|
||||
### Bandwidth limitation in gang and non-gang modes on Instinct MI300A
|
||||
|
||||
Expected target peak non-gang performance (~60GB/s) and target peak gang performance (~90GB/s) are not achieved. Both gang
|
||||
and non-gang performance are observed to be limited at 45GB/s.
|
||||
|
||||
This issue will be addressed in a future ROCm release.
|
||||
|
||||
See [issue #3496](https://github.com/ROCm/ROCm/issues/3496) on GitHub.
|
||||
|
||||
### rocm-llvm-alt
|
||||
|
||||
ROCm provides an optional package -- `rocm-llvm-alt` -- that provides a closed-source compiler for
|
||||
users interested in additional closed-source CPU optimizations. This feature is not functional in
|
||||
the ROCm 6.2.0 release. Users who attempt to invoke the closed-source compiler will experience an
|
||||
LLVM consumer-producer mismatch and the compilation will fail. There is no workaround that allows
|
||||
use of the closed-source compiler. It is recommended to compile using the default open-source
|
||||
compiler, which generates high-quality AMD CPU and AMD GPU code.
|
||||
|
||||
See [issue #3492](https://github.com/ROCm/ROCm/issues/3492) on GitHub.
|
||||
|
||||
## ROCm upcoming changes
|
||||
|
||||
The section notes upcoming changes to the ROCm software stack. For upcoming changes related to individual components, review
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
## ROCm upcoming changes
|
||||
|
||||
The following changes to the ROCm software stack are anticipated for future releases.
|
||||
|
||||
### rocm-llvm-alt
|
||||
|
||||
The `rocm-llvm-alt` package will be removed in an upcoming release. Users relying on the functionality provided by the closed-source compiler should transition to the open-source compiler. Once the `rocm-llvm-alt` package is removed, any compilation requesting functionality provided by the closed-source compiler will result in a Clang warning: "*[AMD] proprietary optimization compiler has been removed*".
|
||||
|
||||
@@ -36,6 +36,7 @@ build_mivisionx() {
|
||||
"$COMPONENT_SRC"
|
||||
|
||||
cmake --build "$BUILD_DIR" -- -j${PROC}
|
||||
cmake --build "$BUILD_DIR" -- install
|
||||
cpack -G ${PKGTYPE^^}
|
||||
|
||||
rm -rf _CPack_Packages/ && find -name '*.o' -delete
|
||||
|
||||
@@ -20,9 +20,27 @@ build_rocal() {
|
||||
set_address_sanitizer_on
|
||||
fi
|
||||
|
||||
mkdir -p $BUILD_DIR && cd $BUILD_DIR
|
||||
# python3 ${COMPONENT_SRC}/rocAL-setup.py
|
||||
pushd /tmp
|
||||
# PyBind11
|
||||
git clone -b v2.11.1 https://github.com/pybind/pybind11
|
||||
cd pybind11 && mkdir build && cd build
|
||||
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../
|
||||
make -j$(nproc) && sudo make install
|
||||
cd ../..
|
||||
# Turbo JPEG
|
||||
git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git
|
||||
cd libjpeg-turbo && mkdir build && cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..
|
||||
make -j$(nproc) && sudo make install
|
||||
cd ../..
|
||||
# RapidJSON
|
||||
git clone https://github.com/Tencent/rapidjson.git
|
||||
cd rapidjson && mkdir build && cd build
|
||||
cmake .. && make -j$(nproc) && sudo make install
|
||||
popd
|
||||
|
||||
python3 ${COMPONENT_SRC}/rocAL-setup.py
|
||||
mkdir -p $BUILD_DIR && cd $BUILD_DIR
|
||||
|
||||
cmake -DAMDRPP_PATH=$ROCM_PATH ${COMPONENT_SRC}
|
||||
make -j${PROC}
|
||||
|
||||
74
tools/rocm-build/rocm-6.2.1.xml
Normal file
74
tools/rocm-build/rocm-6.2.1.xml
Normal file
@@ -0,0 +1,74 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.2.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="ROCm" revision="roc-6.2.x" />
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="ROCT-Thunk-Interface" />
|
||||
<project name="amdsmi" />
|
||||
<project name="omniperf" />
|
||||
<project name="omnitrace" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="HIP" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIP-Examples" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
<project name="hipother" />
|
||||
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||
<project name="half" />
|
||||
<project name="llvm-project" />
|
||||
<!-- gdb projects -->
|
||||
<project name="ROCdbgapi" />
|
||||
<project name="ROCgdb" />
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIOpen" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="Tensile" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipBLAS" />
|
||||
<project groups="mathlibs" name="hipBLASLt" />
|
||||
<project groups="mathlibs" name="hipCUB" />
|
||||
<project groups="mathlibs" name="hipFFT" />
|
||||
<project groups="mathlibs" name="hipRAND" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipSPARSE" />
|
||||
<project groups="mathlibs" name="hipSPARSELt" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocBLAS" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocFFT" />
|
||||
<project groups="mathlibs" name="rocPRIM" />
|
||||
<project groups="mathlibs" name="rocRAND" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocSPARSE" />
|
||||
<project groups="mathlibs" name="rocThrust" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
<!-- Projects for OpenMP-Extras -->
|
||||
<project name="aomp" path="openmp-extras/aomp" />
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
|
||||
<project name="flang" path="openmp-extras/flang" />
|
||||
</manifest>
|
||||
Reference in New Issue
Block a user