Compare commits

..

2 Commits

Author SHA1 Message Date
chiranjeevipattigidi
3337b4fd2a Update packages - remove broken packages 2025-05-21 08:10:47 -04:00
cpattigi
a069100d47 Update envsetup.sh HIP_ON_ROCclr_ROOT path to hip and remove
aqlprofiletest
2025-05-20 08:13:44 -04:00
79 changed files with 2874 additions and 3283 deletions

View File

@@ -32,12 +32,12 @@ parameters:
type: object
default:
- https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
- "onnx>=1.14.1"
- "numpy>=1.21.6"
- "typing>=3.7.4"
- "pytest>=6.0.1"
- "packaging>=23.0"
- "protobuf>=3.20.2"
- onnx>=1.14.1
- numpy>=1.21.6
- typing>=3.7.4
- pytest>=6.0.1
- packaging>=23.0
- protobuf>=3.20.2
- name: rocmDependencies
type: object
default:

View File

@@ -77,8 +77,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: clr
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
cmakeBuildDir: 'clr/build'
extraBuildFlags: >-
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-DHIP_PLATFORM=amd
@@ -139,8 +138,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: clr
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
cmakeBuildDir: 'clr/build'
extraBuildFlags: >-
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-DHIP_PLATFORM=nvidia

View File

@@ -73,7 +73,6 @@ jobs:
parameters:
componentName: upstream-llvm
cmakeBuildDir: $(Pipeline.Workspace)/llvm-project/llvm/build
cmakeSourceDir: $(Pipeline.Workspace)/llvm-project/llvm
installDir: $(Pipeline.Workspace)/llvm
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release

View File

@@ -113,13 +113,13 @@ jobs:
mkdir -p $(Agent.BuildDirectory)/miopen-deps
export CXX=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
export CC=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
cmake -P install_deps.cmake --prefix $(Agent.BuildDirectory)/miopen-deps --generator Ninja
cmake -P install_deps.cmake --prefix $(Agent.BuildDirectory)/miopen-deps
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DMIOPEN_BACKEND=HIP
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/miopen-deps
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/miopen-deps --generator Ninja
-DGPU_TARGETS=${{ job.target }}
-DMIOPEN_ENABLE_AI_KERNEL_TUNING=OFF
-DMIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK=OFF
@@ -214,7 +214,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: MIOpen
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex test_rnn_seq_api'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}

View File

@@ -15,7 +15,6 @@ parameters:
type: object
default:
- bison
- cmake
- dejagnu
- flex
- libbabeltrace-dev
@@ -40,69 +39,17 @@ parameters:
- name: jobMatrix
type: object
default:
testJobs:
buildTestJobs:
- gfx942:
target: gfx942
- gfx90a:
target: gfx90a
jobs:
- job: ROCgdb
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: PKG_CONFIG_PATH
value: $(Agent.BuildDirectory)/rocm/share/pkgconfig
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependencies }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-autotools.yml
parameters:
configureFlags: >-
--program-prefix=roc
--enable-64-bit-bfd
--enable-targets="x86_64-linux-gnu,amdgcn-amd-amdhsa"
--disable-ld
--disable-gas
--disable-gdbserver
--disable-sim
--enable-tui
--disable-gdbtk
--disable-shared
--disable-gprofng
--with-expat
--with-system-zlib
--without-guile
--with-babeltrace
--with-lzma
--with-python=python3
--with-rocm-dbgapi=$(Agent.BuildDirectory)/rocm
LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: ROCgdb_test_${{ job.target }}
dependsOn: ROCgdb
- ${{ each job in parameters.jobMatrix.buildTestJobs }}:
- job: ROCgdb_build_test_${{ job.target }}
condition:
and(succeeded(),
and(
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
eq(${{ parameters.aggregatePipeline }}, False)
@@ -152,6 +99,8 @@ jobs:
--with-rocm-dbgapi=$(Agent.BuildDirectory)/rocm
LDFLAGS="-Wl,--enable-new-dtags,-rpath=$(Agent.BuildDirectory)/rocm/lib"
makeCallPrefix: LD_RUN_PATH='${ORIGIN}/../lib'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- task: Bash@3
displayName: Setup test environment
inputs:
@@ -160,6 +109,7 @@ jobs:
# Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm
echo "##vso[task.prependpath]/opt/rocm/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- task: Bash@3
displayName: check-gdb

View File

@@ -27,7 +27,6 @@ parameters:
type: object
default:
- amdsmi
- aomp
- clr
- hipBLAS-common
- hipBLASLt
@@ -44,7 +43,6 @@ parameters:
type: object
default:
- amdsmi
- aomp
- clr
- hipBLAS-common
- hipBLASLt
@@ -110,7 +108,6 @@ jobs:
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/llvm/include
-DCPACK_PACKAGING_INSTALL_PREFIX=$(Build.BinariesDirectory)
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml

View File

@@ -118,7 +118,6 @@ jobs:
parameters:
componentName: extras
cmakeBuildDir: '$(Build.SourcesDirectory)/aomp-extras/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/aomp-extras'
installDir: '$(Build.BinariesDirectory)/llvm'
extraBuildFlags: >-
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm
@@ -130,7 +129,6 @@ jobs:
parameters:
componentName: openmp
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/openmp/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/openmp'
installDir: '$(Build.BinariesDirectory)/llvm'
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"
@@ -157,7 +155,6 @@ jobs:
parameters:
componentName: offload
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm-project/offload/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm-project/offload'
installDir: '$(Build.BinariesDirectory)/llvm'
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Build.BinariesDirectory)"

View File

@@ -26,11 +26,9 @@ jobs:
parameters:
componentName: HIP
pipelineId: $(HIP_PIPELINE_ID)
- task: Bash@3
displayName: Copy HIP artifacts
inputs:
targetType: inline
script: cp -a $(Agent.BuildDirectory)/rocm/* $(Build.BinariesDirectory)/
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
parameters:
sourceDir: $(Agent.BuildDirectory)/rocm
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml

View File

@@ -133,6 +133,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -27,7 +27,7 @@ parameters:
type: object
default:
- joblib
- "packaging>=22.0"
- packaging>=22.0
- --upgrade
- name: rocmDependencies
type: object
@@ -193,6 +193,8 @@ jobs:
- template: /.azuredevops/variables-global.yml
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -92,8 +92,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: external
cmakeBuildDir: '$(Build.SourcesDirectory)/deps/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/deps'
cmakeBuildDir: 'deps/build'
installDir: '$(Pipeline.Workspace)/deps-install'
extraBuildFlags: >-
-DBUILD_BOOST=OFF
@@ -135,6 +134,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -128,6 +128,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -107,6 +107,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -83,8 +83,7 @@ jobs:
-DROCM_LLVM_BACKWARD_COMPAT_LINK=$(Build.BinariesDirectory)/llvm
-DROCM_LLVM_BACKWARD_COMPAT_LINK_TARGET=./lib/llvm
-GNinja
cmakeBuildDir: '$(Build.SourcesDirectory)/llvm/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/llvm'
cmakeBuildDir: 'llvm/build'
installDir: '$(Build.BinariesDirectory)/llvm'
# use llvm-lit to run unit tests for llvm, clang, and lld
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
@@ -122,8 +121,7 @@ jobs:
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build"
-DCMAKE_BUILD_TYPE=Release
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/device-libs/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/device-libs'
cmakeBuildDir: 'amd/device-libs/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: comgr
@@ -131,8 +129,7 @@ jobs:
-DCMAKE_PREFIX_PATH="$(Build.SourcesDirectory)/llvm/build;$(Build.SourcesDirectory)/amd/device-libs/build"
-DCOMGR_DISABLE_SPIRV=1
-DCMAKE_BUILD_TYPE=Release
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/comgr/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/comgr'
cmakeBuildDir: 'amd/comgr/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: comgr
@@ -145,8 +142,7 @@ jobs:
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-DHIPCC_BACKWARD_COMPATIBILITY=OFF
cmakeBuildDir: '$(Build.SourcesDirectory)/amd/hipcc/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/amd/hipcc'
cmakeBuildDir: 'amd/hipcc/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml

View File

@@ -105,7 +105,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
cmakeSourceDir: $(Build.SourcesDirectory)/grpc
installDir: $(Build.SourcesDirectory)/bin
extraBuildFlags: >-
-DgRPC_INSTALL=ON

View File

@@ -125,7 +125,6 @@ jobs:
parameters:
componentName: PyBind11
cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/pybind11'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
@@ -142,7 +141,6 @@ jobs:
parameters:
componentName: RapidJSON
cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/rapidjson'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
@@ -202,6 +200,7 @@ jobs:
value: $(Agent.BuildDirectory)/rocm/include/rocal
pool:
name: ${{ job.target }}_test_pool
demands: firstRenderDeviceAccess
workspace:
clean: all
steps:

View File

@@ -157,6 +157,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -108,6 +108,7 @@ jobs:
value: $(Agent.BuildDirectory)/rocm
pool:
name: ${{ job.target }}_test_pool
demands: firstRenderDeviceAccess
workspace:
clean: all
steps:

View File

@@ -89,8 +89,6 @@ jobs:
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters:
@@ -114,6 +112,7 @@ jobs:
value: $(Agent.BuildDirectory)/rocm
pool:
name: ${{ job.target }}_test_pool
demands: firstRenderDeviceAccess
workspace:
clean: all
steps:
@@ -123,8 +122,6 @@ jobs:
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
@@ -150,3 +147,4 @@ jobs:
environment: test
gpuTarget: ${{ job.target }}
registerROCmPackages: true
optSymLink: true

View File

@@ -5,12 +5,6 @@ parameters:
- name: checkoutRef
type: string
default: ''
- name: sparseCheckout
type: boolean
default: false
- name: sparseCheckoutDir
type: string
default: ''
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -72,8 +66,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckout: ${{ parameters.sparseCheckout }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}

View File

@@ -168,6 +168,7 @@ jobs:
value: $(Agent.BuildDirectory)/rocm
pool:
name: ${{ job.target }}_test_pool
demands: firstRenderDeviceAccess
workspace:
clean: all
steps:

View File

@@ -105,7 +105,6 @@ jobs:
-DLAPACKE=OFF
-GNinja
cmakeBuildDir: '$(Build.SourcesDirectory)/lapack/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/lapack'
installDir: '$(Pipeline.Workspace)/deps-install'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
@@ -144,6 +143,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -34,7 +34,6 @@ parameters:
- rocminfo
- rocPRIM
- rocprofiler-register
- roctracer
- name: rocmTestDependencies
type: object
default:
@@ -139,6 +138,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -125,6 +125,8 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: AMD_COMGR_CACHE
value: 0
pool: ${{ job.target }}_test_pool
workspace:
clean: all

View File

@@ -183,7 +183,6 @@ jobs:
parameters:
componentName: rocm-examples
testDir: $(Build.SourcesDirectory)/build
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "rocfft_callback"'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}

View File

@@ -14,8 +14,6 @@ parameters:
type: object
default:
- cmake
- libdw-dev
- libtbb-dev
- locales
- ninja-build
- python3-pip
@@ -24,10 +22,10 @@ parameters:
default:
- astunparse==1.6.2
- colorlover
- "dash>=1.12.0"
- dash>=1.12.0
- matplotlib
- "numpy>=1.17.5"
- "pandas>=1.4.3"
- numpy>=1.17.5
- pandas>=1.4.3
- pymongo
- pyyaml
- tabulate
@@ -191,9 +189,12 @@ jobs:
displayName: Add ROCm binaries to PATH
inputs:
targetType: inline
script: |
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
- task: Bash@3
displayName: Add ROCm compilers to PATH
inputs:
targetType: inline
script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
@@ -212,6 +213,18 @@ jobs:
componentName: rocprofiler-compute
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
- task: Bash@3
displayName: Remove ROCm binaries from PATH
condition: always()
inputs:
targetType: inline
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')"
- task: Bash@3
displayName: Remove ROCm compilers from PATH
condition: always()
inputs:
targetType: inline
script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/llvm/bin;;' -e 's;^/;;' -e 's;/$;;')"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}

View File

@@ -38,13 +38,19 @@ jobs:
parameters:
componentName: rocprofiler-register
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
-DROCPROFILER_REGISTER_BUILD_TESTS=ON
-DROCPROFILER_REGISTER_BUILD_SAMPLES=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: rocprofiler-register-tests
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
-GNinja
cmakeBuildDir: 'tests/build'
installEnabled: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: rocprofiler-register
testDir: 'tests/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml

View File

@@ -36,7 +36,7 @@ parameters:
- pandas
- perfetto
- pycobertura
- "pytest>=6.2.5"
- pytest>=6.2.5
- pyyaml
- name: rocmDependencies
type: object

View File

@@ -21,7 +21,6 @@ parameters:
- bzip2
- clang
- cmake
- chrpath
- environment-modules
- ffmpeg
- g++-12
@@ -67,7 +66,6 @@ parameters:
- rocprofiler-register
- rocprofiler-sdk
- ROCR-Runtime
- roctracer
- name: jobMatrix
type: object
@@ -132,7 +130,6 @@ jobs:
-DDYNINST_BUILD_BOOST=ON
-DROCPROFSYS_USE_PAPI=ON
-DROCPROFSYS_USE_MPI=ON
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
-DGPU_TARGETS=${{ job.target }}
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
@@ -167,6 +164,7 @@ jobs:
value: $(Agent.BuildDirectory)/rocm
pool:
name: ${{ job.target }}_test_pool
demands: firstRenderDeviceAccess
workspace:
clean: all
steps:
@@ -206,7 +204,6 @@ jobs:
-DDYNINST_BUILD_BOOST=ON
-DROCPROFSYS_USE_PAPI=ON
-DROCPROFSYS_USE_MPI=ON
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
-DGPU_TARGETS=${{ job.target }}
-GNinja
- task: Bash@3

View File

@@ -38,7 +38,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
cmakeBuildDir: $(Agent.BuildDirectory)/grpc/build
cmakeSourceDir: $(Agent.BuildDirectory)/grpc
extraBuildFlags: >-
-DgRPC_INSTALL=ON
-DgRPC_BUILD_TESTS=OFF

View File

@@ -38,7 +38,6 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
cmakeBuildDir: $(Agent.BuildDirectory)/googletest/build
cmakeSourceDir: $(Agent.BuildDirectory)/googletest
extraBuildFlags: >-
-DGTEST_FORCE_SHARED_CRT=ON
-DCMAKE_DEBUG_POSTFIX=d

View File

@@ -68,7 +68,7 @@ parameters:
default:
- cmake
- astunparse
- "expecttest>=0.2.1"
- expecttest>=0.2.1
- hypothesis
- numpy
- psutil
@@ -76,8 +76,8 @@ parameters:
- requests
- setuptools==75.8.0
- types-dataclasses
- "typing-extensions>=4.8.0"
- "sympy>=1.13.0"
- typing-extensions>=4.8.0
- sympy>=1.13.0
- filelock
- networkx
- jinja2
@@ -85,8 +85,8 @@ parameters:
- lintrunner
- ninja
- packaging
- "optree>=0.13.0"
- "click>=8.0.3"
- optree>=0.13.0
- click>=8.0.3
# list for vision
- auditwheel
- future

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env python3
import json
import requests
import argparse
from pathlib import Path
def get_builds(entries, gpu_target, output):
already_downloaded = {}
for entry in entries:
already_downloaded = _get_builds(entry, gpu_target, already_downloaded, output)
def _get_builds(entry, gpu_target, already_downloaded, output):
print()
print(f"{entry['buildNumber']} - {entry['buildId']} - {entry['repoName']}")
if already_downloaded.get(entry['buildId']):
print('Skipping, already downloaded from build ' + entry['buildId'])
return already_downloaded
artifacts_url = f"https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/{entry['buildId']}/artifacts?api-version=7.1"
artifacts = requests.get(artifacts_url).json()
for artifact in artifacts['value']:
if 'gfx' in artifact['name'] and gpu_target not in artifact['name']:
continue
print('Artifact name: ' + artifact['name'])
print('File size: ~' +
str(round(int(artifact['resource']['properties']['artifactsize'])/1000000, 2)) + ' MB')
download_url = f"{artifact['resource']['downloadUrl']}"
download = requests.get(download_url)
zip_file = Path(output) / f"{artifact['name']}.zip"
with open(zip_file, 'wb') as f:
f.write(download.content)
already_downloaded[entry['buildId']] = True
return already_downloaded
def main():
parser = argparse.ArgumentParser(description="Command line tool for downloading external ci artifacts")
parser.add_argument('--target', type=str, dest="target", choices=["gfx90a", "gfx942"], help="Target gfx")
parser.add_argument('--manifest', type=str, dest="manifest", help='JSON manifest url or path to local manifest')
parser.add_argument('--output_dir', type=str, dest="output", help='Path to download directory')
args = parser.parse_args()
manifest = args.manifest
gpu_target = args.target
if not gpu_target:
print("Enter the GPU target (gfx942, gfx90a)")
gpu_target = input()
if not manifest:
print("Enter the manifest file (URL or local path)")
manifest = input()
if 'http' in manifest:
data = requests.get(manifest).json()
else:
with open(manifest, 'r') as f:
data = json.load(f)
entries = [e for e in data['current']]
entries.extend([e for e in data['dependencies']])
get_builds(entries, gpu_target, args.output)
if __name__ == "__main__":
main()

View File

@@ -10,10 +10,10 @@ parameters:
default: ''
- name: cmakeBuildDir
type: string
default: $(Agent.BuildDirectory)/s/build
default: 'build'
- name: cmakeSourceDir
type: string
default: $(Agent.BuildDirectory)/s
default: '..'
- name: customBuildTarget
type: string
default: ''
@@ -46,7 +46,7 @@ steps:
${{ if eq(parameters.customInstallPath, true) }}:
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ${{ parameters.cmakeSourceDir }}
${{ else }}:
cmakeArgs: ${{ parameters.extraBuildFlags }} ${{ parameters.cmakeSourceDir }}
cmakeArgs: ${{ parameters.extraBuildFlags }} ..
- ${{ if parameters.printDiskSpace }}:
- script: df -h
displayName: Disk space before build

View File

@@ -4,12 +4,6 @@ parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: sparseCheckout
type: boolean
default: false
- name: sparseCheckoutDir
type: string
default: ''
# submodule download behaviour
# change to 'recursive' for repos with submodules
- name: submoduleBehaviour
@@ -21,13 +15,3 @@ steps:
clean: true
submodules: ${{ parameters.submoduleBehaviour }}
retryCountOnTaskFailure: 3
fetchFilter: blob:none
${{ if eq(parameters.sparseCheckout, true) }}:
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
path: sparse
- ${{ if eq(parameters.sparseCheckout, true) }}:
- task: Bash@3
displayName: Symlink sparse checkout
inputs:
targetType: inline
script: ln -s $(Agent.BuildDirectory)/sparse/${{ parameters.sparseCheckoutDir }} $(Agent.BuildDirectory)/s

View File

@@ -463,7 +463,7 @@ steps:
displayName: 'List downloaded ROCm files'
inputs:
targetType: inline
script: ls -la1R $(Agent.BuildDirectory)/rocm
script: ls -1R $(Agent.BuildDirectory)/rocm
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
- task: Bash@3
displayName: 'Link ROCm shared libraries'

View File

@@ -1,82 +0,0 @@
# This template creates and uploads a Docker image from the current environment
# It uses `docker commit` to do so, which can provide more accurate images than the standard template
# It requires the following conditions:
# - Job must be run inside a Docker container
# - The container's external name must be the same as the container's internal hostname
# - Docker must be installed inside said container and given sufficient permissions
# Currently, it is only usable for test jobs run on our self-managed systems
# Jobs run on Azure VMs will not be able to use this template (most if not all build jobs)
parameters:
- name: gpuTarget
type: string
default: ''
- name: environment
type: string
default: build
values:
- build
- test
- combined
- amd
- nvidia
- name: extraPaths
type: string
default: ''
- name: extraEnvVars
type: object
default: []
- name: forceDockerCreation
type: boolean
default: false
steps:
- task: Bash@3
displayName: Commit container and initialize Dockerfile
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
inputs:
workingDirectory: $(Pipeline.Workspace)
targetType: inline
script: |
docker commit $(hostname) az-ci-temp-image:latest
echo "FROM az-ci-temp-image:latest" > Dockerfile
echo "RUN sudo groupmod -g $(getent group render | awk -F: '{print $3}') render" >> Dockerfile
echo "RUN sudo groupmod -g $(getent group docker | awk -F: '{print $3}') docker" >> Dockerfile
echo "ENV PATH='$PATH:${{ parameters.extraPaths }}'" >> Dockerfile
echo "ENTRYPOINT [\"/bin/bash\"]" >> Dockerfile
- ${{ each extraEnvVar in parameters.extraEnvVars }}:
- task: Bash@3
displayName: Add extra environment variables
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
inputs:
workingDirectory: $(Pipeline.Workspace)
targetType: inline
script: echo "ENV ${{ split(extraEnvVar, ':::')[0] }}='${{ split(extraEnvVar, ':::')[1] }}'" >> Dockerfile
- task: Bash@3
displayName: Print Dockerfile
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
inputs:
workingDirectory: $(Pipeline.Workspace)
targetType: inline
script: cat Dockerfile
- task: Docker@2
displayName: Build and upload Docker image
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
inputs:
containerRegistry: 'ContainerService3'
${{ if ne(parameters.gpuTarget, '') }}:
repository: '$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}'
${{ else }}:
repository: '$(Build.DefinitionName)-${{ parameters.environment }}'
Dockerfile: '$(Pipeline.Workspace)/Dockerfile'
buildContext: '$(Pipeline.Workspace)'
- task: Bash@3
condition: or(and(failed(), not(contains(variables['DOCKER_SKIP_GFX'], variables['JOB_GPU_TARGET']))), ${{ eq(parameters.forceDockerCreation, true) }})
displayName: "!! Docker Image URL !!"
inputs:
workingDirectory: $(Pipeline.Workspace)
targetType: inline
${{ if ne(parameters.gpuTarget, '') }}:
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
${{ else }}:
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'

View File

@@ -106,7 +106,6 @@ parameters:
type: object
default:
- gfx90a
- gfx942
steps:
# these steps should only be run if there was a failure or warning
@@ -335,7 +334,7 @@ steps:
- task: Docker@2
condition: or(and(failed(), ${{ not(containsValue(parameters.dockerSkipGfx, parameters.gpuTarget)) }}), ${{ eq(parameters.forceDockerCreation, true) }})
inputs:
containerRegistry: 'ContainerService3'
containerRegistry: 'ContainerService'
${{ if ne(parameters.gpuTarget, '') }}:
repository: '$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}'
${{ else }}:
@@ -349,6 +348,6 @@ steps:
workingDirectory: $(Pipeline.Workspace)
targetType: inline
${{ if ne(parameters.gpuTarget, '') }}:
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}-${{ parameters.gpuTarget }}:$(Build.BuildId)"
${{ else }}:
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)" | tr '[:upper:]' '[:lower:]'
script: echo "rocmexternalcicd.azurecr.io/$(Build.DefinitionName)-${{ parameters.environment }}:$(Build.BuildId)"

View File

@@ -34,7 +34,6 @@ Autocast
BARs
BLAS
BMC
BabelStream
Blit
Blockwise
Bluefield
@@ -77,7 +76,6 @@ Concretized
Conda
ConnectX
CuPy
da
Dashboarding
DBRX
DDR
@@ -139,7 +137,6 @@ GDR
GDS
GEMM
GEMMs
GFLOPS
GFortran
GFXIP
Gemma
@@ -228,7 +225,6 @@ LM
LSAN
LSan
LTS
LanguageCrossEntropy
LoRA
MEM
MERCHANTABILITY
@@ -246,7 +242,6 @@ MMIOH
MMU
MNIST
MPI
MPT
MSVC
MVAPICH
MVFFR
@@ -263,7 +258,6 @@ Meta's
Miniconda
MirroredStrategy
Mixtral
MosaicML
Multicore
Multithreaded
MyEnvironment
@@ -272,7 +266,6 @@ NBIO
NBIOs
NCCL
NCF
NFS
NIC
NICs
NLI
@@ -335,7 +328,6 @@ PipelineParallel
PnP
PowerEdge
PowerShell
Pretrained
Pretraining
Profiler's
PyPi
@@ -501,7 +493,6 @@ ZenDNN
accuracies
activations
addr
ade
ai
alloc
allocatable
@@ -517,7 +508,6 @@ avx
awk
backend
backends
bb
benchmarked
benchmarking
bfloat
@@ -541,7 +531,6 @@ cd
centos
centric
changelog
checkpointing
chiplet
cmake
cmd
@@ -582,7 +571,6 @@ de
deallocation
debuggability
debian
deepseek
denoise
denoised
denoises
@@ -606,7 +594,6 @@ embeddings
enablement
encodings
endfor
endif
endpgm
enqueue
env
@@ -649,7 +636,6 @@ hipSPARSELt
hipTensor
hipamd
hipblas
hipcc
hipcub
hipfft
hipfort
@@ -708,7 +694,6 @@ migratable
miopen
miopengemm
mivisionx
mixtral
mjx
mkdir
mlirmiopen
@@ -766,7 +751,6 @@ profilers
protobuf
pseudorandom
py
pytorch
recommender
recommenders
quantile
@@ -850,7 +834,6 @@ subfolder
subfolders
submodule
submodules
subnet
supercomputing
symlink
symlinks

View File

@@ -4,142 +4,9 @@ This page is a historical overview of changes made to ROCm components. This
consolidated changelog documents key modifications and improvements across
different versions of the ROCm software stack and its components.
## ROCm 6.4.1
See the [ROCm 6.4.1 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
for a complete overview of this release.
### **AMD SMI** (25.4.2)
#### Added
* Dumping CPER entries from RAS tool `amdsmi_get_gpu_cper_entries()` to Python and C APIs.
- Dumping CPER entries consist of `amdsmi_cper_hdr_t`.
- Dumping CPER entries is also enabled in the CLI interface through `sudo amd-smi ras --cper`.
* `amdsmi_get_gpu_busy_percent` to the C API.
#### Changed
* Modified VRAM display for `amd-smi monitor -v`.
#### Optimized
* Improved load times for CLI commands when the GPU has multiple parititons.
#### Resolved issues
* Fixed partition enumeration in `amd-smi list -e`, `amdsmi_get_gpu_enumeration_info()`, `amdsmi_enumeration_info_t`, `drm_card`, and `drm_render` fields.
#### Known issues
* When using the `--follow` flag with `amd-smi ras --cper`, CPER entries are not streamed continuously as intended. This will be fixed in an upcoming ROCm release.
```{note}
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
```
### **HIP** (6.4.1)
#### Added
* New log mask enumeration `LOG_COMGR` enables logging precise code object information.
#### Changed
* HIP runtime uses device bitcode before SPIRV.
* The implementation of preventing `hipLaunchKernel` latency degradation with number of idle streams is reverted or disabled by default.
#### Optimized
* Improved kernel logging includes de-mangling shader names.
* Refined implementation in HIP APIs `hipEventRecords` and `hipStreamWaitEvent` for performance improvement.
#### Resolved issues
* Stale state during the graph capture. The return error was fixed, HIP runtime now always uses the latest dependent nodes during `hipEventRecord` capture.
* Segmentation fault during kernel execution. HIP runtime now allows maximum stack size as per ISA on the GPU device.
### **hipBLASLt** (0.12.1)
#### Resolved issues
* Fixed an accuracy issue for some solutions using an `FP32` or `TF32` data type with a TT transpose.
### **RCCL** (2.22.3)
#### Changed
* MSCCL++ is now disabled by default. To enable it, set `RCCL_MSCCLPP_ENABLE=1`.
#### Resolved issues
* Fixed an issue where early termination, in rare circumstances, could cause the application to stop responding by adding synchronization before destroying a proxy thread.
* Fixed the accuracy issue for the MSCCLPP `allreduce7` kernel in graph mode.
#### Known issues
* When splitting a communicator using `ncclCommSplit` in some GPU configurations, MSCCL initialization can cause a segmentation fault. The recommended workaround is to disable MSCCL with `export RCCL_MSCCL_ENABLE=0`.
This issue will be fixed in a future ROCm release.
* Within the RCCL-UnitTests test suite, failures occur in tests ending with the
`.ManagedMem` and `.ManagedMemGraph` suffixes. These failures only affect the
test results and do not affect the RCCL component itself. This issue will be
resolved in a future ROCm release.
### **rocALUTION** (3.2.3)
#### Added
* The `-a` option has been added to the `rmake.py` build script. This option allows you to select specific architectures when building on Microsoft Windows.
#### Resolved issues
* Fixed an issue where the `HIP_PATH` environment variable was being ignored when compiling on Microsoft Windows.
### **ROCm Data Center Tool** (0.3.0)
#### Added
- Support for GPU partitions.
- `RDC_FI_GPU_BUSY_PERCENT` metric.
#### Changed
- Updated `rdc_field` to align with `rdc_bootstrap` for current metrics.
#### Resolved issues
- Fixed [ROCProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html) eval metrics and memory leaks.
### **ROCm SMI** (7.5.0)
#### Resolved issues
- Fixed partition enumeration. It now refers to the correct DRM Render and Card paths.
```{note}
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
```
### **ROCm Systems Profiler** (1.0.1)
#### Added
* How-to document for [network performance profiling](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/how-to/nic-profiling.html) for standard Network Interface Cards (NICs).
#### Resolved issues
* Fixed a build issue with Dyninst on GCC 13.
### **ROCr Runtime** (1.15.0)
#### Resolved issues
* Fixed a rare occurrence issue on AMD Instinct MI25, MI50, and MI100 GPUs, where the `SDMA` copies might start before the dependent Kernel finishes and could cause memory corruption.
## ROCm 6.4.0
See the [ROCm 6.4.0 release notes](https://rocm.docs.amd.com/en/docs-6.4.0/about/release-notes.html)
See the [ROCm 6.4.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
for a complete overview of this release.
### **AMD SMI** (25.3.0)
@@ -258,7 +125,8 @@ Some workaround options are as follows:
- The `pasid` field in struct `amdsmi_process_info_t` will be deprecated in a future ROCm release.
```{note}
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
and in-depth descriptions.
```
### **AMDMIGraphX** (2.12.0)
@@ -810,6 +678,7 @@ The following lists the backward incompatible changes planned for upcoming major
* Roofline support for Ubuntu 24.04.
* Experimental support `rocprofv3` (not enabled as default).
* Experimental feature: Spatial multiplexing.
#### Resolved issues
@@ -868,7 +737,8 @@ The following lists the backward incompatible changes planned for upcoming major
- Fixed `rsmi_dev_target_graphics_version_get`, `rocm-smi --showhw`, and `rocm-smi --showprod` not displaying graphics version correctly for Instinct MI200 series, MI100 series, and RDNA3-based GPUs.
```{note}
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.4.x/CHANGELOG.md) for details, examples,
and in-depth descriptions.
```
### **ROCm Systems Profiler** (1.0.0)
@@ -876,10 +746,6 @@ See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rele
#### Added
- Support for VA-API and rocDecode tracing.
- Aggregation of MPI data collected across distributed nodes and ranks. The data is concatenated into a single proto file.
#### Changed
- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).
#### Resolved issues
@@ -890,21 +756,9 @@ See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/rele
- Fixed interruption in config file generation.
- Fixed segmentation fault while running rocprof-sys-instrument.
- Fixed an issue where running `rocprof-sys-causal` or using the `-I all` option with `rocprof-sys-sample` caused the system to become non-responsive.
- Fixed an issue where sampling multi-GPU Python workloads caused the system to stop responding.
### **ROCm Validation Suite** (1.1.0)
#### Added
* Configuration files for MI210.
* Support for OCP fp8 data type.
* GPU index-based CLI execution.
#### Changed
* JSON logging with updated schema.
- Backend refactored to use [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) rather than [ROCProfiler](https://github.com/ROCm/rocprofiler) and [ROCTracer](https://github.com/ROCm/ROCTracer).
### **rocPRIM** (3.4.0)
@@ -3602,7 +3456,7 @@ See [issue #3499](https://github.com/ROCm/ROCm/issues/3499) on GitHub.
- Error when running Omniperf with an application with command line arguments. As a workaround, create an
intermediary script to call the application with the necessary arguments, then call the script with Omniperf. This
issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/rocprofiler-compute/issues/347).
issue is fixed in a future release of Omniperf. See [#347](https://github.com/ROCm/omniperf/issues/347).
- Omniperf might not work with AMD Instinct MI300 accelerators out of the box, resulting in the following error:
"*ERROR gfx942 is not enabled rocprofv1. Available profilers include: ['rocprofv2']*". As a workaround, add the
@@ -4479,7 +4333,7 @@ for a complete overview of this release.
* New multiple node and GPU support.
Unsmoothed and smoothed aggregations and Ruge-Stueben AMG now work with multiple nodes
and GPUs. For more information, refer to the
[API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/docs-6.1.0/usermanual/solvers.html#unsmoothed-aggregation-amg).
[API documentation](https://rocm.docs.amd.com/projects/rocALUTION/en/latest/usermanual/solvers.html#unsmoothed-aggregation-amg).
### **rocDecode** (0.5.0)

View File

@@ -50,7 +50,7 @@ The following example shows how to use the repo tool to download the ROCm source
```bash
mkdir -p ~/ROCm/
cd ~/ROCm/
export ROCM_VERSION=6.4.1
export ROCM_VERSION=6.4.0
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
~/bin/repo sync
```
@@ -77,7 +77,7 @@ The Build time will reduce significantly if we limit the GPU Architecture/s agai
mkdir -p ~/WORKSPACE/ # Or any folder name other than WORKSPACE
cd ~/WORKSPACE/
export ROCM_VERSION=6.4.1
export ROCM_VERSION=6.4.0
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
~/bin/repo sync

1530
RELEASE.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-6.4.1"
<default revision="refs/tags/rocm-6.4.0"
remote="rocm-org"
sync-c="true"
sync-j="4" />

View File

@@ -81,7 +81,6 @@ additional licenses. Please review individual repositories for more information.
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |

View File

@@ -1,125 +1,121 @@
ROCm Version,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
,"RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
,RDNA4,,,,,,,,,,,,,,,
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA4-OS-past-60]_,,,,,,,,,,,,,,,
,gfx1200 [#RDNA4-OS-past-60]_,,,,,,,,,,,,,,,
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
,,,,,,,,,,,,,,,,
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
,,,,,,,,,,,,,,,,
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
Thrust,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
CUB,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
,,,,,,,,,,,,,,,,
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
,,,,,,,,,,,,,,,,
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
,,,,,,,,,,,,,,,,
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
:doc:`rocSHMEM <rocshmem:index>`,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
,,,,,,,,,,,,,,,,
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
,,,,,,,,,,,,,,,,
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
,,,,,,,,,,,,,,,,
SUPPORT LIBS,,,,,,,,,,,,,,,,
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
,,,,,,,,,,,,,,,,
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
,,,,,,,,,,,,,,,,
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
,,,,,,,,,,,,,,,,
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
,,,,,,,,,,,,,,,,
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
:doc:`llvm-project <llvm-project:index>`,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
,,,,,,,,,,,,,,,,
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
,,,,,,,,,,,,,,,
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
,,,,,,,,,,,,,,,
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
,,,,,,,,,,,,,,,
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
,,,,,,,,,,,,,,,
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
,,,,,,,,,,,,,,,
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
,,,,,,,,,,,,,,,
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
,,,,,,,,,,,,,,,
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
,,,,,,,,,,,,,,,
SUPPORT LIBS,,,,,,,,,,,,,,,
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.2,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
,,,,,,,,,,,,,,,
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60102,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
,,,,,,,,,,,,,,,
PERFORMANCE TOOLS,,,,,,,,,,,,,,,
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
,,,,,,,,,,,,,,,
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
:doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
,,,,,,,,,,,,,,,
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
,,,,,,,,,,,,,,,
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
1 ROCm Version 6.4.1 6.4.0 6.3.3 6.3.2 6.3.1 6.3.0 6.2.4 6.2.2 6.2.1 6.2.0 6.1.5 6.1.2 6.1.1 6.1.0 6.0.2 6.0.0
2 :ref:`Operating systems & kernels <OS-kernel-versions>` Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.2 Ubuntu 24.04.1, 24.04 Ubuntu 24.04.1, 24.04 Ubuntu 24.04.1, 24.04 Ubuntu 24.04
3 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4 Ubuntu 22.04.5, 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3 Ubuntu 22.04.4, 22.04.3, 22.04.2 Ubuntu 22.04.4, 22.04.3, 22.04.2
4 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5 Ubuntu 20.04.6, 20.04.5
5 RHEL 9.6, 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.5, 9.4 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.4, 9.3, 9.2 RHEL 9.3, 9.2 RHEL 9.3, 9.2
6 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.10, 8.9 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8 RHEL 8.9, 8.8
7 SLES 15 SP6 SLES 15 SP6 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP6, SP5 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4 SLES 15 SP5, SP4
8 CentOS 7.9 CentOS 7.9 CentOS 7.9 CentOS 7.9 CentOS 7.9
9 Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 9, 8 [#mi300x-past-60]_ Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.10 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_ Oracle Linux 8.9 [#mi300x-past-60]_
10 Debian 12 [#single-node-past-60]_ Debian 12 [#single-node-past-60]_ Debian 12 [#single-node-past-60]_ Debian 12 [#single-node-past-60]_ Debian 12 [#single-node-past-60]_
11 Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 [#mi300x-past-60]_ Azure Linux 3.0 [#mi300x-past-60]_
12 .. _architecture-support-compatibility-matrix-past-60: .. _architecture-support-compatibility-matrix-past-60:
13 :doc:`Architecture <rocm-install-on-linux:reference/system-requirements>` CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3 CDNA3
14 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2 CDNA2
15 CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA CDNA
16 RDNA4 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3 RDNA3
17 RDNA3 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2 RDNA3 RDNA2
18 RDNA2 RDNA2 .. _gpu-support-compatibility-matrix-past-60: RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2 RDNA2
19 :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` .. _gpu-support-compatibility-matrix-past-60: gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100 gfx1100
20 :doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` gfx1201 [#RDNA4-OS-past-60]_ gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030 gfx1030
21 gfx1200 [#RDNA4-OS-past-60]_ gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 [#mi300_624-past-60]_ gfx942 [#mi300_622-past-60]_ gfx942 [#mi300_621-past-60]_ gfx942 [#mi300_620-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_611-past-60]_ gfx942 [#mi300_610-past-60]_ gfx942 [#mi300_602-past-60]_ gfx942 [#mi300_600-past-60]_
22 gfx1100 gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a gfx1100 gfx90a
23 gfx1030 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908 gfx1030 gfx908
24 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 gfx942 [#mi300_624-past-60]_ gfx942 [#mi300_622-past-60]_ gfx942 [#mi300_621-past-60]_ gfx942 [#mi300_620-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_612-past-60]_ gfx942 [#mi300_611-past-60]_ gfx942 [#mi300_610-past-60]_ gfx942 [#mi300_602-past-60]_ gfx942 [#mi300_600-past-60]_
25 FRAMEWORK SUPPORT gfx90a gfx90a .. _framework-support-compatibility-matrix-past-60: gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a gfx90a
26 :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>` gfx908 gfx908 2.6, 2.5, 2.4, 2.3 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 1.13 gfx908 2.4, 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.3, 2.2, 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13 gfx908 2.1, 2.0, 1.13
27 :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>` 2.18.1, 2.17.1, 2.16.2 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.14.0, 2.13.1, 2.12.1 2.14.0, 2.13.1, 2.12.1
28 FRAMEWORK SUPPORT :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>` .. _framework-support-compatibility-matrix-past-60: 0.4.35 0.4.31 0.4.31 0.4.31 0.4.31 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26
29 :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>` `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_ 2.6, 2.5, 2.4, 2.3 2.6, 2.5, 2.4, 2.3 1.2 2.4, 2.3, 2.2, 1.13 1.17.3 2.4, 2.3, 2.2, 1.13 1.17.3 2.4, 2.3, 2.2, 1.13 1.17.3 2.4, 2.3, 2.2, 2.1, 2.0, 1.13 1.17.3 2.3, 2.2, 2.1, 2.0, 1.13 1.17.3 2.3, 2.2, 2.1, 2.0, 1.13 1.17.3 2.3, 2.2, 2.1, 2.0, 1.13 1.17.3 2.3, 2.2, 2.1, 2.0, 1.13 1.17.3 2.1, 2.0, 1.13 1.17.3 2.1, 2.0, 1.13 1.17.3 2.1, 2.0, 1.13 1.17.3 2.1, 2.0, 1.13 1.17.3 2.1, 2.0, 1.13 1.14.1 2.1, 2.0, 1.13 1.14.1
30 :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>` 2.18.1, 2.17.1, 2.16.2 2.18.1, 2.17.1, 2.16.2 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.17.0, 2.16.2, 2.15.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.16.1, 2.15.1, 2.14.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.15.0, 2.14.0, 2.13.1 2.14.0, 2.13.1, 2.12.1 2.14.0, 2.13.1, 2.12.1
31 :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>` 0.4.35 0.4.35 0.4.31 0.4.31 0.4.31 0.4.31 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26 0.4.26
32 `ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_ THIRD PARTY COMMS 1.2 1.2 .. _thirdpartycomms-support-compatibility-matrix-past-60: 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.17.3 1.14.1 1.14.1
33 `UCC <https://github.com/ROCm/ucc>`_ >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.2.0 >=1.2.0
34 `UCX <https://github.com/ROCm/ucx>`_ >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.15.0 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1 >=1.14.1
35 THIRD PARTY COMMS .. _thirdpartycomms-support-compatibility-matrix-past-60:
36 `UCC <https://github.com/ROCm/ucc>`_ THIRD PARTY ALGORITHM >=1.3.0 >=1.3.0 .. _thirdpartyalgorithm-support-compatibility-matrix-past-60: >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.3.0 >=1.2.0 >=1.2.0
37 `UCX <https://github.com/ROCm/ucx>`_ Thrust >=1.15.0 >=1.15.0 2.5.0 >=1.15.0 2.3.2 >=1.15.0 2.3.2 >=1.15.0 2.3.2 >=1.15.0 2.3.2 >=1.15.0 2.2.0 >=1.15.0 2.2.0 >=1.15.0 2.2.0 >=1.15.0 2.2.0 >=1.14.1 2.1.0 >=1.14.1 2.1.0 >=1.14.1 2.1.0 >=1.14.1 2.1.0 >=1.14.1 2.0.1 >=1.14.1 2.0.1
38 CUB 2.5.0 2.3.2 2.3.2 2.3.2 2.3.2 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.1 2.0.1
39 THIRD PARTY ALGORITHM .. _thirdpartyalgorithm-support-compatibility-matrix-past-60:
40 Thrust KMD & USER SPACE [#kfd_support-past-60]_ 2.5.0 2.5.0 .. _kfd-userspace-support-compatibility-matrix-past-60: 2.3.2 2.3.2 2.3.2 2.3.2 2.2.0 2.2.0 2.2.0 2.2.0 2.1.0 2.1.0 2.1.0 2.1.0 2.0.1 2.0.1
41 CUB KMD versions 2.5.0 2.5.0 6.4.x, 6.3.x 2.3.2 6.4.x, 6.3.x, 6.2.x, 6.1.x 2.3.2 6.4.x, 6.3.x, 6.2.x, 6.1.x 2.3.2 6.4.x, 6.3.x, 6.2.x, 6.1.x 2.3.2 6.4.x, 6.3.x, 6.2.x, 6.1.x 2.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 2.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 2.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 2.2.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 2.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 2.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 2.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 2.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 2.0.1 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 2.0.1 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x
42
43 KMD & USER SPACE [#kfd_support-past-60]_ ML & COMPUTER VISION .. _kfd-userspace-support-compatibility-matrix-past-60: .. _mllibs-support-compatibility-matrix-past-60:
44 KMD versions :doc:`Composable Kernel <composable_kernel:index>` 6.4.x, 6.3.x 6.4.x, 6.3.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 1.1.0 6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x 1.1.0 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 1.1.0 6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x 1.1.0
45 :doc:`MIGraphX <amdmigraphx:index>` 2.12.0 2.11.0 2.11.0 2.11.0 2.11.0 2.10.0 2.10.0 2.10.0 2.10.0 2.9.0 2.9.0 2.9.0 2.9.0 2.8.0 2.8.0
46 ML & COMPUTER VISION :doc:`MIOpen <miopen:index>` .. _mllibs-support-compatibility-matrix-past-60: 3.4.0 3.3.0 3.3.0 3.3.0 3.3.0 3.2.0 3.2.0 3.2.0 3.2.0 3.1.0 3.1.0 3.1.0 3.1.0 3.0.0 3.0.0
47 :doc:`Composable Kernel <composable_kernel:index>` :doc:`MIVisionX <mivisionx:index>` 1.1.0 1.1.0 3.2.0 1.1.0 3.1.0 1.1.0 3.1.0 1.1.0 3.1.0 1.1.0 3.1.0 1.1.0 3.0.0 1.1.0 3.0.0 1.1.0 3.0.0 1.1.0 3.0.0 1.1.0 2.5.0 1.1.0 2.5.0 1.1.0 2.5.0 1.1.0 2.5.0 1.1.0 2.5.0 1.1.0 2.5.0
48 :doc:`MIGraphX <amdmigraphx:index>` :doc:`rocAL <rocal:index>` 2.12.0 2.12.0 2.2.0 2.11.0 2.1.0 2.11.0 2.1.0 2.11.0 2.1.0 2.11.0 2.1.0 2.10.0 2.0.0 2.10.0 2.0.0 2.10.0 2.0.0 2.10.0 1.0.0 2.9.0 1.0.0 2.9.0 1.0.0 2.9.0 1.0.0 2.9.0 1.0.0 2.8.0 1.0.0 2.8.0 1.0.0
49 :doc:`MIOpen <miopen:index>` :doc:`rocDecode <rocdecode:index>` 3.4.0 3.4.0 0.10.0 3.3.0 0.8.0 3.3.0 0.8.0 3.3.0 0.8.0 3.3.0 0.8.0 3.2.0 0.6.0 3.2.0 0.6.0 3.2.0 0.6.0 3.2.0 0.6.0 3.1.0 0.6.0 3.1.0 0.6.0 3.1.0 0.5.0 3.1.0 0.5.0 3.0.0 N/A 3.0.0 N/A
50 :doc:`MIVisionX <mivisionx:index>` :doc:`rocJPEG <rocjpeg:index>` 3.2.0 3.2.0 0.8.0 3.1.0 0.6.0 3.1.0 0.6.0 3.1.0 0.6.0 3.1.0 0.6.0 3.0.0 N/A 3.0.0 N/A 3.0.0 N/A 3.0.0 N/A 2.5.0 N/A 2.5.0 N/A 2.5.0 N/A 2.5.0 N/A 2.5.0 N/A 2.5.0 N/A
51 :doc:`rocAL <rocal:index>` :doc:`rocPyDecode <rocpydecode:index>` 2.2.0 2.2.0 0.3.1 2.1.0 0.2.0 2.1.0 0.2.0 2.1.0 0.2.0 2.1.0 0.2.0 2.0.0 0.1.0 2.0.0 0.1.0 2.0.0 0.1.0 1.0.0 0.1.0 1.0.0 N/A 1.0.0 N/A 1.0.0 N/A 1.0.0 N/A 1.0.0 N/A 1.0.0 N/A
52 :doc:`rocDecode <rocdecode:index>` :doc:`RPP <rpp:index>` 0.10.0 0.10.0 1.9.10 0.8.0 1.9.1 0.8.0 1.9.1 0.8.0 1.9.1 0.8.0 1.9.1 0.6.0 1.8.0 0.6.0 1.8.0 0.6.0 1.8.0 0.6.0 1.8.0 0.6.0 1.5.0 0.6.0 1.5.0 0.5.0 1.5.0 0.5.0 1.5.0 N/A 1.4.0 N/A 1.4.0
53 :doc:`rocJPEG <rocjpeg:index>` 0.8.0 0.8.0 0.6.0 0.6.0 0.6.0 0.6.0 N/A N/A N/A N/A N/A N/A N/A N/A N/A N/A
54 :doc:`rocPyDecode <rocpydecode:index>` COMMUNICATION 0.3.1 0.3.1 .. _commlibs-support-compatibility-matrix-past-60: 0.2.0 0.2.0 0.2.0 0.2.0 0.1.0 0.1.0 0.1.0 0.1.0 N/A N/A N/A N/A N/A N/A
55 :doc:`RPP <rpp:index>` :doc:`RCCL <rccl:index>` 1.9.10 1.9.10 2.22.3 1.9.1 2.21.5 1.9.1 2.21.5 1.9.1 2.21.5 1.9.1 2.21.5 1.8.0 2.20.5 1.8.0 2.20.5 1.8.0 2.20.5 1.8.0 2.20.5 1.5.0 2.18.6 1.5.0 2.18.6 1.5.0 2.18.6 1.5.0 2.18.6 1.4.0 2.18.3 1.4.0 2.18.3
56
57 COMMUNICATION MATH LIBS .. _commlibs-support-compatibility-matrix-past-60: .. _mathlibs-support-compatibility-matrix-past-60:
58 :doc:`RCCL <rccl:index>` `half <https://github.com/ROCm/half>`_ 2.22.3 2.22.3 1.12.0 2.21.5 1.12.0 2.21.5 1.12.0 2.21.5 1.12.0 2.21.5 1.12.0 2.20.5 1.12.0 2.20.5 1.12.0 2.20.5 1.12.0 2.20.5 1.12.0 2.18.6 1.12.0 2.18.6 1.12.0 2.18.6 1.12.0 2.18.6 1.12.0 2.18.3 1.12.0 2.18.3 1.12.0
59 :doc:`rocSHMEM <rocshmem:index>` :doc:`hipBLAS <hipblas:index>` 2.0.0 2.0.0 2.4.0 N/A 2.3.0 N/A 2.3.0 N/A 2.3.0 N/A 2.3.0 N/A 2.2.0 N/A 2.2.0 N/A 2.2.0 N/A 2.2.0 N/A 2.1.0 N/A 2.1.0 N/A 2.1.0 N/A 2.1.0 N/A 2.0.0 N/A 2.0.0
60 :doc:`hipBLASLt <hipblaslt:index>` 0.12.0 0.10.0 0.10.0 0.10.0 0.10.0 0.8.0 0.8.0 0.8.0 0.8.0 0.7.0 0.7.0 0.7.0 0.7.0 0.6.0 0.6.0
61 MATH LIBS :doc:`hipFFT <hipfft:index>` .. _mathlibs-support-compatibility-matrix-past-60: 1.0.18 1.0.17 1.0.17 1.0.17 1.0.17 1.0.16 1.0.15 1.0.15 1.0.14 1.0.14 1.0.14 1.0.14 1.0.14 1.0.13 1.0.13
62 `half <https://github.com/ROCm/half>`_ :doc:`hipfort <hipfort:index>` 1.12.0 1.12.0 0.6.0 1.12.0 0.5.1 1.12.0 0.5.1 1.12.0 0.5.0 1.12.0 0.5.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0 1.12.0 0.4.0
63 :doc:`hipBLAS <hipblas:index>` :doc:`hipRAND <hiprand:index>` 2.4.0 2.4.0 2.12.0 2.3.0 2.11.1 2.3.0 2.11.1 2.3.0 2.11.1 2.3.0 2.11.0 2.2.0 2.11.1 2.2.0 2.11.0 2.2.0 2.11.0 2.2.0 2.11.0 2.1.0 2.10.16 2.1.0 2.10.16 2.1.0 2.10.16 2.1.0 2.10.16 2.0.0 2.10.16 2.0.0 2.10.16
64 :doc:`hipBLASLt <hipblaslt:index>` :doc:`hipSOLVER <hipsolver:index>` 0.12.1 0.12.0 2.4.0 0.10.0 2.3.0 0.10.0 2.3.0 0.10.0 2.3.0 0.10.0 2.3.0 0.8.0 2.2.0 0.8.0 2.2.0 0.8.0 2.2.0 0.8.0 2.2.0 0.7.0 2.1.1 0.7.0 2.1.1 0.7.0 2.1.1 0.7.0 2.1.0 0.6.0 2.0.0 0.6.0 2.0.0
65 :doc:`hipFFT <hipfft:index>` :doc:`hipSPARSE <hipsparse:index>` 1.0.18 1.0.18 3.2.0 1.0.17 3.1.2 1.0.17 3.1.2 1.0.17 3.1.2 1.0.17 3.1.2 1.0.16 3.1.1 1.0.15 3.1.1 1.0.15 3.1.1 1.0.14 3.1.1 1.0.14 3.0.1 1.0.14 3.0.1 1.0.14 3.0.1 1.0.14 3.0.1 1.0.13 3.0.0 1.0.13 3.0.0
66 :doc:`hipfort <hipfort:index>` :doc:`hipSPARSELt <hipsparselt:index>` 0.6.0 0.6.0 0.2.3 0.5.1 0.2.2 0.5.1 0.2.2 0.5.0 0.2.2 0.5.0 0.2.2 0.4.0 0.2.1 0.4.0 0.2.1 0.4.0 0.2.1 0.4.0 0.2.1 0.4.0 0.2.0 0.4.0 0.2.0 0.4.0 0.1.0 0.4.0 0.1.0 0.4.0 0.1.0 0.4.0 0.1.0
67 :doc:`hipRAND <hiprand:index>` :doc:`rocALUTION <rocalution:index>` 2.12.0 2.12.0 3.2.2 2.11.1 3.2.1 2.11.1 3.2.1 2.11.1 3.2.1 2.11.0 3.2.1 2.11.1 3.2.1 2.11.0 3.2.0 2.11.0 3.2.0 2.11.0 3.2.0 2.10.16 3.1.1 2.10.16 3.1.1 2.10.16 3.1.1 2.10.16 3.1.1 2.10.16 3.0.3 2.10.16 3.0.3
68 :doc:`hipSOLVER <hipsolver:index>` :doc:`rocBLAS <rocblas:index>` 2.4.0 2.4.0 4.4.0 2.3.0 4.3.0 2.3.0 4.3.0 2.3.0 4.3.0 2.3.0 4.3.0 2.2.0 4.2.4 2.2.0 4.2.1 2.2.0 4.2.1 2.2.0 4.2.0 2.1.1 4.1.2 2.1.1 4.1.2 2.1.1 4.1.0 2.1.0 4.1.0 2.0.0 4.0.0 2.0.0 4.0.0
69 :doc:`hipSPARSE <hipsparse:index>` :doc:`rocFFT <rocfft:index>` 3.2.0 3.2.0 1.0.32 3.1.2 1.0.31 3.1.2 1.0.31 3.1.2 1.0.31 3.1.2 1.0.31 3.1.1 1.0.30 3.1.1 1.0.29 3.1.1 1.0.29 3.1.1 1.0.28 3.0.1 1.0.27 3.0.1 1.0.27 3.0.1 1.0.27 3.0.1 1.0.26 3.0.0 1.0.25 3.0.0 1.0.23
70 :doc:`hipSPARSELt <hipsparselt:index>` :doc:`rocRAND <rocrand:index>` 0.2.3 0.2.3 3.3.0 0.2.2 3.2.0 0.2.2 3.2.0 0.2.2 3.2.0 0.2.2 3.2.0 0.2.1 3.1.1 0.2.1 3.1.0 0.2.1 3.1.0 0.2.1 3.1.0 0.2.0 3.0.1 0.2.0 3.0.1 0.1.0 3.0.1 0.1.0 3.0.1 0.1.0 3.0.0 0.1.0 2.10.17
71 :doc:`rocALUTION <rocalution:index>` :doc:`rocSOLVER <rocsolver:index>` 3.2.3 3.2.2 3.28.0 3.2.1 3.27.0 3.2.1 3.27.0 3.2.1 3.27.0 3.2.1 3.27.0 3.2.1 3.26.2 3.2.0 3.26.0 3.2.0 3.26.0 3.2.0 3.26.0 3.1.1 3.25.0 3.1.1 3.25.0 3.1.1 3.25.0 3.1.1 3.25.0 3.0.3 3.24.0 3.0.3 3.24.0
72 :doc:`rocBLAS <rocblas:index>` :doc:`rocSPARSE <rocsparse:index>` 4.4.0 4.4.0 3.4.0 4.3.0 3.3.0 4.3.0 3.3.0 4.3.0 3.3.0 4.3.0 3.3.0 4.2.4 3.2.1 4.2.1 3.2.0 4.2.1 3.2.0 4.2.0 3.2.0 4.1.2 3.1.2 4.1.2 3.1.2 4.1.0 3.1.2 4.1.0 3.1.2 4.0.0 3.0.2 4.0.0 3.0.2
73 :doc:`rocFFT <rocfft:index>` :doc:`rocWMMA <rocwmma:index>` 1.0.32 1.0.32 1.7.0 1.0.31 1.6.0 1.0.31 1.6.0 1.0.31 1.6.0 1.0.31 1.6.0 1.0.30 1.5.0 1.0.29 1.5.0 1.0.29 1.5.0 1.0.28 1.5.0 1.0.27 1.4.0 1.0.27 1.4.0 1.0.27 1.4.0 1.0.26 1.4.0 1.0.25 1.3.0 1.0.23 1.3.0
74 :doc:`rocRAND <rocrand:index>` :doc:`Tensile <tensile:src/index>` 3.3.0 3.3.0 4.43.0 3.2.0 4.42.0 3.2.0 4.42.0 3.2.0 4.42.0 3.2.0 4.42.0 3.1.1 4.41.0 3.1.0 4.41.0 3.1.0 4.41.0 3.1.0 4.41.0 3.0.1 4.40.0 3.0.1 4.40.0 3.0.1 4.40.0 3.0.1 4.40.0 3.0.0 4.39.0 2.10.17 4.39.0
75 :doc:`rocSOLVER <rocsolver:index>` 3.28.0 3.28.0 3.27.0 3.27.0 3.27.0 3.27.0 3.26.2 3.26.0 3.26.0 3.26.0 3.25.0 3.25.0 3.25.0 3.25.0 3.24.0 3.24.0
76 :doc:`rocSPARSE <rocsparse:index>` PRIMITIVES 3.4.0 3.4.0 .. _primitivelibs-support-compatibility-matrix-past-60: 3.3.0 3.3.0 3.3.0 3.3.0 3.2.1 3.2.0 3.2.0 3.2.0 3.1.2 3.1.2 3.1.2 3.1.2 3.0.2 3.0.2
77 :doc:`rocWMMA <rocwmma:index>` :doc:`hipCUB <hipcub:index>` 1.7.0 1.7.0 3.4.0 1.6.0 3.3.0 1.6.0 3.3.0 1.6.0 3.3.0 1.6.0 3.3.0 1.5.0 3.2.1 1.5.0 3.2.0 1.5.0 3.2.0 1.5.0 3.2.0 1.4.0 3.1.0 1.4.0 3.1.0 1.4.0 3.1.0 1.4.0 3.1.0 1.3.0 3.0.0 1.3.0 3.0.0
78 :doc:`Tensile <tensile:src/index>` :doc:`hipTensor <hiptensor:index>` 4.43.0 4.43.0 1.5.0 4.42.0 1.4.0 4.42.0 1.4.0 4.42.0 1.4.0 4.42.0 1.4.0 4.41.0 1.3.0 4.41.0 1.3.0 4.41.0 1.3.0 4.41.0 1.3.0 4.40.0 1.2.0 4.40.0 1.2.0 4.40.0 1.2.0 4.40.0 1.2.0 4.39.0 1.1.0 4.39.0 1.1.0
79 :doc:`rocPRIM <rocprim:index>` 3.4.0 3.3.0 3.3.0 3.3.0 3.3.0 3.2.2 3.2.0 3.2.0 3.2.0 3.1.0 3.1.0 3.1.0 3.1.0 3.0.0 3.0.0
80 PRIMITIVES :doc:`rocThrust <rocthrust:index>` .. _primitivelibs-support-compatibility-matrix-past-60: 3.3.0 3.3.0 3.3.0 3.3.0 3.3.0 3.1.1 3.1.0 3.1.0 3.0.1 3.0.1 3.0.1 3.0.1 3.0.1 3.0.0 3.0.0
81 :doc:`hipCUB <hipcub:index>` 3.4.0 3.4.0 3.3.0 3.3.0 3.3.0 3.3.0 3.2.1 3.2.0 3.2.0 3.2.0 3.1.0 3.1.0 3.1.0 3.1.0 3.0.0 3.0.0
82 :doc:`hipTensor <hiptensor:index>` SUPPORT LIBS 1.5.0 1.5.0 1.4.0 1.4.0 1.4.0 1.4.0 1.3.0 1.3.0 1.3.0 1.3.0 1.2.0 1.2.0 1.2.0 1.2.0 1.1.0 1.1.0
83 :doc:`rocPRIM <rocprim:index>` `hipother <https://github.com/ROCm/hipother>`_ 3.4.0 3.4.0 6.4.43482 3.3.0 6.3.42134 3.3.0 6.3.42134 3.3.0 6.3.42133 3.3.0 6.3.42131 3.2.2 6.2.41134 3.2.0 6.2.41134 3.2.0 6.2.41134 3.2.0 6.2.41133 3.1.0 6.1.40093 3.1.0 6.1.40093 3.1.0 6.1.40092 3.1.0 6.1.40091 3.0.0 6.1.32831 3.0.0 6.1.32830
84 :doc:`rocThrust <rocthrust:index>` `rocm-core <https://github.com/ROCm/rocm-core>`_ 3.3.0 3.3.0 6.4.0 3.3.0 6.3.3 3.3.0 6.3.2 3.3.0 6.3.1 3.3.0 6.3.0 3.1.1 6.2.4 3.1.0 6.2.2 3.1.0 6.2.1 3.0.1 6.2.0 3.0.1 6.1.2 3.0.1 6.1.2 3.0.1 6.1.1 3.0.1 6.1.0 3.0.0 6.0.2 3.0.0 6.0.0
85 `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_ N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 20240607.5.7 20240607.5.7 20240607.4.05 20240607.1.4246 20240125.5.08 20240125.5.08 20240125.5.08 20240125.3.30 20231016.2.245 20231016.2.245
86 SUPPORT LIBS
87 `hipother <https://github.com/ROCm/hipother>`_ SYSTEM MGMT TOOLS 6.4.43483 6.4.43482 .. _tools-support-compatibility-matrix-past-60: 6.3.42134 6.3.42134 6.3.42133 6.3.42131 6.2.41134 6.2.41134 6.2.41134 6.2.41133 6.1.40093 6.1.40093 6.1.40092 6.1.40091 6.1.32831 6.1.32830
88 `rocm-core <https://github.com/ROCm/rocm-core>`_ :doc:`AMD SMI <amdsmi:index>` 6.4.1 6.4.0 25.3.0 6.3.3 24.7.1 6.3.2 24.7.1 6.3.1 24.7.1 6.3.0 24.7.1 6.2.4 24.6.3 6.2.2 24.6.3 6.2.1 24.6.3 6.2.0 24.6.2 6.1.5 24.5.1 6.1.2 24.5.1 6.1.1 24.5.1 6.1.0 24.4.1 6.0.2 23.4.2 6.0.0 23.4.2
89 `ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_ :doc:`ROCm Data Center Tool <rdc:index>` N/A [#ROCT-rocr-past-60]_ N/A [#ROCT-rocr-past-60]_ 0.3.0 N/A [#ROCT-rocr-past-60]_ 0.3.0 N/A [#ROCT-rocr-past-60]_ 0.3.0 N/A [#ROCT-rocr-past-60]_ 0.3.0 N/A [#ROCT-rocr-past-60]_ 0.3.0 20240607.5.7 0.3.0 20240607.5.7 0.3.0 20240607.4.05 0.3.0 20240607.1.4246 0.3.0 20240125.5.08 0.3.0 20240125.5.08 0.3.0 20240125.5.08 0.3.0 20240125.3.30 0.3.0 20231016.2.245 0.3.0 20231016.2.245 0.3.0
90 :doc:`rocminfo <rocminfo:index>` 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
91 SYSTEM MGMT TOOLS :doc:`ROCm SMI <rocm_smi_lib:index>` .. _tools-support-compatibility-matrix-past-60: 7.5.0 7.4.0 7.4.0 7.4.0 7.4.0 7.3.0 7.3.0 7.3.0 7.3.0 7.2.0 7.2.0 7.0.0 7.0.0 6.0.2 6.0.0
92 :doc:`AMD SMI <amdsmi:index>` :doc:`ROCm Validation Suite <rocmvalidationsuite:index>` 25.4.2 25.3.0 1.1.0 24.7.1 1.1.0 24.7.1 1.1.0 24.7.1 1.1.0 24.7.1 1.1.0 24.6.3 1.0.60204 24.6.3 1.0.60202 24.6.3 1.0.60201 24.6.2 1.0.60200 24.5.1 1.0.60102 24.5.1 1.0.60102 24.5.1 1.0.60101 24.4.1 1.0.60100 23.4.2 1.0.60002 23.4.2 1.0.60000
93 :doc:`ROCm Data Center Tool <rdc:index>` 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0 0.3.0
94 :doc:`rocminfo <rocminfo:index>` PERFORMANCE TOOLS 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
95 :doc:`ROCm SMI <rocm_smi_lib:index>` :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>` 7.5.0 7.5.0 1.4.0 7.4.0 1.4.0 7.4.0 1.4.0 7.4.0 1.4.0 7.4.0 1.4.0 7.3.0 1.4.0 7.3.0 1.4.0 7.3.0 1.4.0 7.3.0 1.4.0 7.2.0 1.4.0 7.2.0 1.4.0 7.0.0 1.4.0 7.0.0 1.4.0 6.0.2 1.4.0 6.0.0 1.4.0
96 :doc:`ROCm Validation Suite <rocmvalidationsuite:index>` :doc:`ROCm Compute Profiler <rocprofiler-compute:index>` 1.1.0 1.1.0 3.1.0 1.1.0 3.0.0 1.1.0 3.0.0 1.1.0 3.0.0 1.1.0 3.0.0 1.0.60204 2.0.1 1.0.60202 2.0.1 1.0.60201 2.0.1 1.0.60200 2.0.1 1.0.60105 N/A 1.0.60102 N/A 1.0.60101 N/A 1.0.60100 N/A 1.0.60002 N/A 1.0.60000 N/A
97 :doc:`ROCm Systems Profiler <rocprofiler-systems:index>` 1.0.0 0.1.2 0.1.1 0.1.0 0.1.0 1.11.2 1.11.2 1.11.2 1.11.2 N/A N/A N/A N/A N/A N/A
98 PERFORMANCE TOOLS :doc:`ROCProfiler <rocprofiler:index>` 2.0.60400 2.0.60303 2.0.60302 2.0.60301 2.0.60300 2.0.60204 2.0.60202 2.0.60201 2.0.60200 2.0.60102 2.0.60102 2.0.60101 2.0.60100 2.0.60002 2.0.60000
99 :doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>` :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>` 1.4.0 1.4.0 0.6.0 1.4.0 0.5.0 1.4.0 0.5.0 1.4.0 0.5.0 1.4.0 0.5.0 1.4.0 0.4.0 1.4.0 0.4.0 1.4.0 0.4.0 1.4.0 0.4.0 1.4.0 N/A 1.4.0 N/A 1.4.0 N/A 1.4.0 N/A 1.4.0 N/A 1.4.0 N/A
100 :doc:`ROCm Compute Profiler <rocprofiler-compute:index>` :doc:`ROCTracer <roctracer:index>` 3.1.0 3.1.0 4.1.60400 3.0.0 4.1.60303 3.0.0 4.1.60302 3.0.0 4.1.60301 3.0.0 4.1.60300 2.0.1 4.1.60204 2.0.1 4.1.60202 2.0.1 4.1.60201 2.0.1 4.1.60200 N/A 4.1.60102 N/A 4.1.60102 N/A 4.1.60101 N/A 4.1.60100 N/A 4.1.60002 N/A 4.1.60000
101 :doc:`ROCm Systems Profiler <rocprofiler-systems:index>` 1.0.1 1.0.0 0.1.2 0.1.1 0.1.0 0.1.0 1.11.2 1.11.2 1.11.2 1.11.2 N/A N/A N/A N/A N/A N/A
102 :doc:`ROCProfiler <rocprofiler:index>` DEVELOPMENT TOOLS 2.0.60401 2.0.60400 2.0.60303 2.0.60302 2.0.60301 2.0.60300 2.0.60204 2.0.60202 2.0.60201 2.0.60200 2.0.60105 2.0.60102 2.0.60101 2.0.60100 2.0.60002 2.0.60000
103 :doc:`ROCprofiler-SDK <rocprofiler-sdk:index>` :doc:`HIPIFY <hipify:index>` 0.6.0 0.6.0 19.0.0.25104 0.5.0 18.0.0.25012 0.5.0 18.0.0.25012 0.5.0 18.0.0.24491 0.5.0 18.0.0.24455 0.4.0 18.0.0.24392 0.4.0 18.0.0.24355 0.4.0 18.0.0.24355 0.4.0 18.0.0.24232 N/A 17.0.0.24193 N/A 17.0.0.24193 N/A 17.0.0.24154 N/A 17.0.0.24103 N/A 17.0.0.24012 N/A 17.0.0.23483
104 :doc:`ROCTracer <roctracer:index>` :doc:`ROCm CMake <rocmcmakebuildtools:index>` 4.1.60401 4.1.60400 0.14.0 4.1.60303 0.14.0 4.1.60302 0.14.0 4.1.60301 0.14.0 4.1.60300 0.14.0 4.1.60204 0.13.0 4.1.60202 0.13.0 4.1.60201 0.13.0 4.1.60200 0.13.0 4.1.60105 0.12.0 4.1.60102 0.12.0 4.1.60101 0.12.0 4.1.60100 0.12.0 4.1.60002 0.11.0 4.1.60000 0.11.0
105 :doc:`ROCdbgapi <rocdbgapi:index>` 0.77.2 0.77.0 0.77.0 0.77.0 0.77.0 0.76.0 0.76.0 0.76.0 0.76.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0
106 DEVELOPMENT TOOLS :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>` 15.2.0 15.2.0 15.2.0 15.2.0 15.2.0 14.2.0 14.2.0 14.2.0 14.2.0 14.1.0 14.1.0 14.1.0 14.1.0 13.2.0 13.2.0
107 :doc:`HIPIFY <hipify:index>` `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_ 19.0.0 19.0.0 0.4.0 18.0.0.25012 0.4.0 18.0.0.25012 0.4.0 18.0.0.24491 0.4.0 18.0.0.24455 0.4.0 18.0.0.24392 0.4.0 18.0.0.24355 0.4.0 18.0.0.24355 0.4.0 18.0.0.24232 0.4.0 17.0.0.24193 0.3.0 17.0.0.24193 0.3.0 17.0.0.24154 0.3.0 17.0.0.24103 0.3.0 17.0.0.24012 N/A 17.0.0.23483 N/A
108 :doc:`ROCm CMake <rocmcmakebuildtools:index>` :doc:`ROCr Debug Agent <rocr_debug_agent:index>` 0.14.0 0.14.0 2.0.4 0.14.0 2.0.3 0.14.0 2.0.3 0.14.0 2.0.3 0.14.0 2.0.3 0.13.0 2.0.3 0.13.0 2.0.3 0.13.0 2.0.3 0.13.0 2.0.3 0.12.0 2.0.3 0.12.0 2.0.3 0.12.0 2.0.3 0.12.0 2.0.3 0.11.0 2.0.3 0.11.0 2.0.3
109 :doc:`ROCdbgapi <rocdbgapi:index>` 0.77.2 0.77.2 0.77.0 0.77.0 0.77.0 0.77.0 0.76.0 0.76.0 0.76.0 0.76.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0 0.71.0
110 :doc:`ROCm Debugger (ROCgdb) <rocgdb:index>` COMPILERS 15.2.0 15.2.0 .. _compilers-support-compatibility-matrix-past-60: 15.2.0 15.2.0 15.2.0 15.2.0 14.2.0 14.2.0 14.2.0 14.2.0 14.1.0 14.1.0 14.1.0 14.1.0 13.2.0 13.2.0
111 `rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_ `clang-ocl <https://github.com/ROCm/clang-ocl>`_ 0.4.0 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.4.0 N/A 0.3.0 0.5.0 0.3.0 0.5.0 0.3.0 0.5.0 0.3.0 0.5.0 N/A 0.5.0 N/A 0.5.0
112 :doc:`ROCr Debug Agent <rocr_debug_agent:index>` :doc:`hipCC <hipcc:index>` 2.0.4 2.0.4 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.1.1 2.0.3 1.0.0 2.0.3 1.0.0 2.0.3 1.0.0 2.0.3 1.0.0 2.0.3 1.0.0 2.0.3 1.0.0
113 `Flang <https://github.com/ROCm/flang>`_ 19.0.0.25133 18.0.0.25012 18.0.0.25012 18.0.0.24491 18.0.0.24455 18.0.0.24392 18.0.0.24355 18.0.0.24355 18.0.0.24232 17.0.0.24193 17.0.0.24193 17.0.0.24154 17.0.0.24103 17.0.0.24012 17.0.0.23483
114 COMPILERS :doc:`llvm-project <llvm-project:index>` .. _compilers-support-compatibility-matrix-past-60: 19.0.0.25133 18.0.0.25012 18.0.0.25012 18.0.0.24491 18.0.0.24491 18.0.0.24392 18.0.0.24355 18.0.0.24355 18.0.0.24232 17.0.0.24193 17.0.0.24193 17.0.0.24154 17.0.0.24103 17.0.0.24012 17.0.0.23483
115 `clang-ocl <https://github.com/ROCm/clang-ocl>`_ `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_ N/A N/A 19.0.0.25133 N/A 18.0.0.25012 N/A 18.0.0.25012 N/A 18.0.0.24491 N/A 18.0.0.24491 N/A 18.0.0.24392 N/A 18.0.0.24355 N/A 18.0.0.24355 N/A 18.0.0.24232 0.5.0 17.0.0.24193 0.5.0 17.0.0.24193 0.5.0 17.0.0.24154 0.5.0 17.0.0.24103 0.5.0 17.0.0.24012 0.5.0 17.0.0.23483
116 :doc:`hipCC <hipcc:index>` 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.1.1 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0 1.0.0
117 `Flang <https://github.com/ROCm/flang>`_ RUNTIMES 19.0.0.25184 19.0.0.25133 .. _runtime-support-compatibility-matrix-past-60: 18.0.0.25012 18.0.0.25012 18.0.0.24491 18.0.0.24455 18.0.0.24392 18.0.0.24355 18.0.0.24355 18.0.0.24232 17.0.0.24193 17.0.0.24193 17.0.0.24154 17.0.0.24103 17.0.0.24012 17.0.0.23483
118 :doc:`llvm-project <llvm-project:index>` :doc:`AMD CLR <hip:understand/amd_clr>` 19.0.0.25184 19.0.0.25133 6.4.43482 18.0.0.25012 6.3.42134 18.0.0.25012 6.3.42134 18.0.0.24491 6.3.42133 18.0.0.24491 6.3.42131 18.0.0.24392 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24232 6.2.41133 17.0.0.24193 6.1.40093 17.0.0.24193 6.1.40093 17.0.0.24154 6.1.40092 17.0.0.24103 6.1.40091 17.0.0.24012 6.1.32831 17.0.0.23483 6.1.32830
119 `OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_ :doc:`HIP <hip:index>` 19.0.0.25184 19.0.0.25133 6.4.43482 18.0.0.25012 6.3.42134 18.0.0.25012 6.3.42134 18.0.0.24491 6.3.42133 18.0.0.24491 6.3.42131 18.0.0.24392 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24355 6.2.41134 18.0.0.24232 6.2.41133 17.0.0.24193 6.1.40093 17.0.0.24193 6.1.40093 17.0.0.24154 6.1.40092 17.0.0.24103 6.1.40091 17.0.0.24012 6.1.32831 17.0.0.23483 6.1.32830
120 `OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_ 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0
121 RUNTIMES :doc:`ROCr Runtime <rocr-runtime:index>` .. _runtime-support-compatibility-matrix-past-60: 1.15.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.13.0 1.13.0 1.13.0 1.13.0 1.13.0 1.12.0 1.12.0
:doc:`AMD CLR <hip:understand/amd_clr>` 6.4.43483 6.4.43482 6.3.42134 6.3.42134 6.3.42133 6.3.42131 6.2.41134 6.2.41134 6.2.41134 6.2.41133 6.1.40093 6.1.40093 6.1.40092 6.1.40091 6.1.32831 6.1.32830
:doc:`HIP <hip:index>` 6.4.43483 6.4.43482 6.3.42134 6.3.42134 6.3.42133 6.3.42131 6.2.41134 6.2.41134 6.2.41134 6.2.41133 6.1.40093 6.1.40093 6.1.40092 6.1.40091 6.1.32831 6.1.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_ 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0 2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>` 1.15.0 1.15.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.14.0 1.13.0 1.13.0 1.13.0 1.13.0 1.13.0 1.12.0 1.12.0

View File

@@ -23,130 +23,126 @@ compatibility and system requirements.
.. container:: format-big-table
.. csv-table::
:header: "ROCm Version", "6.4.1", "6.4.0", "6.3.0"
:header: "ROCm Version", "6.4.0", "6.3.3", "6.2.0"
:stub-columns: 1
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
,"RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4"
,RHEL 8.10,RHEL 8.10,RHEL 8.10
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5"
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04
,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4"
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3"
,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9"
,"SLES 15 SP6","SLES 15 SP6, SP5","SLES 15 SP6, SP5"
,"Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_,Oracle Linux 8.9 [#mi300x]_
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
,.. _architecture-support-compatibility-matrix:,,
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
,CDNA2,CDNA2,CDNA2
,CDNA,CDNA,CDNA
,RDNA4,,
,RDNA3,RDNA3,RDNA3
,RDNA2,RDNA2,RDNA2
,.. _gpu-support-compatibility-matrix:,,
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA4-OS]_,,
,gfx1200 [#RDNA4-OS]_,,
,gfx1100,gfx1100,gfx1100
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
,gfx1030,gfx1030,gfx1030
,gfx942,gfx942,gfx942
,gfx942,gfx942,gfx942 [#mi300_620]_
,gfx90a,gfx90a,gfx90a
,gfx908,gfx908,gfx908
,,,
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.3, 2.2, 2.1, 2.0, 1.13"
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1"
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.26
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.20,1.17.3,1.17.3
,,,
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
,,,
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
Thrust,2.5.0,2.5.0,2.3.2
CUB,2.5.0,2.5.0,2.3.2
Thrust,2.5.0,2.3.2,2.2.0
CUB,2.5.0,2.3.2,2.2.0
,,,
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x"
,,,
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.10.0
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.2.0
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.0.0
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,1.0.0
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.6.0
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,N/A
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.1.0
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.8.0
,,,
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
:doc:`rocSHMEM <rocshmem:index>`,2.0.0,2.0.0,N/A
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.20.5
,,,
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.2.0
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.8.0
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.14
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.4.0
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.0
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.2.0
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.1
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.1
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.0
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.2.0
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.28
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.1.0
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.26.0
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.2.0
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.5.0
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.41.0
,,,
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.2.0
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.3.0
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.2.0
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.0.1
,,,
SUPPORT LIBS,,,
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42131
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.2.41133
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.2.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,20240607.1.4246
,,,
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.6.2
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.3.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.0.60200
,,,
PERFORMANCE TOOLS,,,
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.0
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60300
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60300
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,2.0.1
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,1.11.2
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60200
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.4.0
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60200
,,,
DEVELOPMENT TOOLS,,,
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
:doc:`HIPIFY <hipify:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.13.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.76.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,14.2.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3
,,,
COMPILERS,.. _compilers-support-compatibility-matrix:,,
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25184,19.0.0.25133,18.0.0.24455
:doc:`llvm-project <llvm-project:index>`,19.0.0.25184,19.0.0.25133,18.0.0.24491
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25184,19.0.0.25133,18.0.0.24491
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
,,,
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42131
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42131
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.2.41133
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.2.41133
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.13.0
.. rubric:: Footnotes
@@ -156,7 +152,6 @@ compatibility and system requirements.
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
.. [#kfd_support] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
.. [#RDNA4-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9070 (gfx1201), Radeon RX 9070 GRE (gfx1201), and Radeon RX 9060 XT (gfx1200) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.
.. _OS-kernel-versions:
@@ -174,8 +169,7 @@ Use this lookup table to confirm which operating system and kernel versions are
,,
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
,,
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
, 9.5, 5.14+, 2.34
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.5, 5.14+, 2.34
,9.4, 5.14+, 2.34
,9.3, 5.14+, 2.34
,,
@@ -236,4 +230,3 @@ Expand for full historical view of:
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
.. [#kfd_support-past-60] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
.. [#RDNA4-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), and Radeon RX 9060 XT (gfx1200) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.

View File

@@ -14,18 +14,17 @@ JAX provides a NumPy-like API, which combines automatic differentiation and the
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
learning at scale.
JAX uses composable transformations of Python and NumPy through just-in-time
(JIT) compilation, automatic vectorization, and parallelization. To learn about
JAX, including profiling and optimizations, see the official `JAX documentation
JAX uses composable transformations of Python and NumPy through just-in-time (JIT) compilation,
automatic vectorization, and parallelization. To learn about JAX, including profiling and
optimizations, see the official `JAX documentation
<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
ROCm support for JAX is upstreamed, and users can build the official source code
with ROCm support:
ROCm support for JAX is upstreamed and users can build the official source code with ROCm
support:
- ROCm JAX release:
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
with ROCm and JAX preinstalled.
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>` with ROCm and JAX pre-installed.
- ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
@@ -37,8 +36,8 @@ with ROCm support:
- Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
- See the `AMD GPU (Linux) installation section
<https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
the JAX documentation.
<https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in the JAX
documentation.
.. note::
@@ -47,44 +46,6 @@ with ROCm support:
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
follow upstream JAX releases and use the latest available ROCm version.
Use cases and recommendations
================================================================================
* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
blog explores the implementation and training of a Generative Pre-trained
Transformer (GPT) model in JAX, inspired by Andrej Karpathys JAX-based
nanoGPT. Comparing how essential GPT components—such as self-attention
mechanisms and optimizers—are realized in JAX and JAX, also highlights
JAXs unique features.
* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
blog post provides a comprehensive guide on enhancing the training efficiency
of GPT models by implementing mixed precision techniques in JAX, specifically
tailored for AMD GPUs utilizing the ROCm platform.
* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
blog demonstrates how to develop a custom fused dropout-activation kernel for
matrices using Triton, integrate it with JAX, and benchmark its performance
using ROCm.
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
outlines the process of fine-tuning a Bidirectional Encoder Representations
from Transformers (BERT)-based large language model (LLM) using JAX for a text
classification task. The blog post discuss techniques for parallelizing the
fine-tuning across multiple AMD GPUs and assess the model's performance on a
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
and the General Language Understanding Evaluation (GLUE) benchmark dataset was
used on a multi-GPU setup.
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
accelerator using ROCm. The page is aimed at helping users achieve optimal
performance for deep learning and other high-performance computing tasks on
the MI300X GPU.
For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.
.. _jax-docker-compat:
Docker image compatibility
@@ -96,8 +57,8 @@ Docker image compatibility
AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
with ROCm backends on Docker Hub. The following Docker image tags and
associated inventories represent the latest JAX version from the official Docker Hub and are validated for
`ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_. Click the |docker-icon|
associated inventories are validated for
`ROCm 6.3.1 <https://repo.radeon.com/rocm/apt/6.3.1/>`_. Click the |docker-icon|
icon to view the image on Docker Hub.
.. list-table:: JAX Docker image components
@@ -107,26 +68,24 @@ icon to view the image on Docker Hub.
- JAX
- Linux
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.1-jax0.4.35-py3.12/images/sha256-7a0745a2a2758bdf86397750bac00e9086cbf67d170cfdbb08af73f7c7d18a6a"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.12/images/sha256-085a0cd5207110922f1fca684933a9359c66d42db6c5aba4760ed5214fdabde0"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
- Ubuntu 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.1-jax0.4.35-py3.10/images/sha256-5f9e8d6e6e69fdc9a1a3f2ba3b1234c3f46c53b7468538c07fd18b00899da54f"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.3.1-jax0.4.31-py3.10/images/sha256-f88eddad8f47856d8640b694da4da347ffc1750d7363175ab7dc872e82b43324"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- `0.4.31 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.31>`_
- Ubuntu 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `3.10.14 <https://www.python.org/downloads/release/python-31014/>`_
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
with ROCm backends on Docker Hub. The following Docker image tags and
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
associated inventories are tested for `ROCm 6.2.4 <https://repo.radeon.com/rocm/apt/6.2.4/>`_.
.. list-table:: JAX community Docker image components
:header-rows: 1
@@ -135,37 +94,35 @@ associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/
- JAX
- Linux
- Python
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.12.7/images/sha256-a6032d89c07573b84c44e42c637bf9752b1b7cd2a222d39344e603d8f4c63beb?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- Ubuntu 22.04
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
- `3.12.7 <https://www.python.org/downloads/release/python-3127/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.11.10/images/sha256-d462f7e445545fba2f3b92234a21beaa52fe6c5f550faabcfdcd1bf53486d991?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- Ubuntu 22.04
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
- `3.11.10 <https://www.python.org/downloads/release/python-31110/>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.2.4-jax0.4.35-py3.10.15/images/sha256-6f2d4d0f529378d9572f0e8cfdcbc101d1e1d335bd626bb3336fff87814e9d60?context=explore"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
- Ubuntu 22.04
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `3.10.15 <https://www.python.org/downloads/release/python-31015/>`_
Key ROCm libraries for JAX
Critical ROCm libraries for JAX
================================================================================
JAX functionality on ROCm is determined by its underlying library
dependencies. These ROCm components affect the capabilities, performance, and
feature set available to developers.
The functionality of JAX with ROCm is determined by its underlying library
dependencies. These critical ROCm components affect the capabilities,
performance, and feature set available to developers. The versions described
are available in ROCm :version:`rocm_version`.
.. list-table::
:header-rows: 1
@@ -253,10 +210,10 @@ feature set available to developers.
distributed training, which involves parallel reductions or
operations like ``jax.numpy.cumsum`` can use rocThrust.
Supported features
Supported and unsupported features
===============================================================================
The following table maps the public JAX API modules to their supported
The following table maps GPU-accelerated JAX modules to their supported
ROCm and JAX versions.
.. list-table::
@@ -264,8 +221,8 @@ ROCm and JAX versions.
* - Module
- Description
- As of JAX
- As of ROCm
- Since JAX
- Since ROCm
* - ``jax.numpy``
- Implements the NumPy API, using the primitives in ``jax.lax``.
- 0.1.56
@@ -293,11 +250,21 @@ ROCm and JAX versions.
devices.
- 0.3.20
- 5.1.0
* - ``jax.dlpack``
- For exchanging tensor data between JAX and other libraries that support the
DLPack standard.
- 0.1.57
- 5.0.0
* - ``jax.distributed``
- Enables the scaling of computations across multiple devices on a single
machine or across multiple machines.
- 0.1.74
- 5.0.0
* - ``jax.dtypes``
- Provides utilities for working with and managing data types in JAX
arrays and computations.
- 0.1.66
- 5.0.0
* - ``jax.image``
- Contains image manipulation functions like resize, scale and translation.
- 0.1.57
@@ -311,10 +278,27 @@ ROCm and JAX versions.
array.
- 0.1.57
- 5.0.0
* - ``jax.profiler``
- Contains JAXs tracing and time profiling features.
- 0.1.57
- 5.0.0
* - ``jax.stages``
- Contains interfaces to stages of the compiled execution process.
- 0.3.4
- 5.0.0
* - ``jax.tree``
- Provides utilities for working with tree-like container data structures.
- 0.4.26
- 5.6.0
* - ``jax.tree_util``
- Provides utilities for working with nested data structures, or
``pytrees``.
- 0.1.65
- 5.0.0
* - ``jax.typing``
- Provides JAX-specific static type annotations.
- 0.3.18
- 5.1.0
* - ``jax.extend``
- Provides modules for access to JAX internal machinery module. The
``jax.extend`` module defines a library view of some of JAXs internal
@@ -350,8 +334,8 @@ A SciPy-like API for scientific computing.
:header-rows: 1
* - Module
- As of JAX
- As of ROCm
- Since JAX
- Since ROCm
* - ``jax.scipy.cluster``
- 0.3.11
- 5.1.0
@@ -396,8 +380,8 @@ jax.scipy.stats module
:header-rows: 1
* - Module
- As of JAX
- As of ROCm
- Since JAX
- Since ROCm
* - ``jax.scipy.stats.bernouli``
- 0.1.56
- 5.0.0
@@ -480,8 +464,8 @@ Modules for JAX extensions.
:header-rows: 1
* - Module
- As of JAX
- As of ROCm
- Since JAX
- Since ROCm
* - ``jax.extend.ffi``
- 0.4.30
- 6.0.0
@@ -495,25 +479,190 @@ Modules for JAX extensions.
- 0.4.15
- 5.5.0
Unsupported JAX features
===============================================================================
jax.experimental module
-------------------------------------------------------------------------------
The following GPU-accelerated JAX features are not supported by ROCm for
the listed supported JAX versions.
Experimental modules and APIs.
.. list-table::
:header-rows: 1
* - Module
- Since JAX
- Since ROCm
* - ``jax.experimental.checkify``
- 0.1.75
- 5.0.0
* - ``jax.experimental.compilation_cache.compilation_cache``
- 0.1.68
- 5.0.0
* - ``jax.experimental.custom_partitioning``
- 0.4.0
- 5.3.0
* - ``jax.experimental.jet``
- 0.1.56
- 5.0.0
* - ``jax.experimental.key_reuse``
- 0.4.26
- 5.6.0
* - ``jax.experimental.mesh_utils``
- 0.1.76
- 5.0.0
* - ``jax.experimental.multihost_utils``
- 0.3.2
- 5.0.0
* - ``jax.experimental.pallas``
- 0.4.15
- 5.5.0
* - ``jax.experimental.pjit``
- 0.1.61
- 5.0.0
* - ``jax.experimental.serialize_executable``
- 0.4.0
- 5.3.0
* - ``jax.experimental.shard_map``
- 0.4.3
- 5.3.0
* - ``jax.experimental.sparse``
- 0.1.75
- 5.0.0
.. list-table::
:header-rows: 1
* - API
- Since JAX
- Since ROCm
* - ``jax.experimental.enable_x64``
- 0.1.60
- 5.0.0
* - ``jax.experimental.disable_x64``
- 0.1.60
- 5.0.0
jax.experimental.pallas module
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Module for Pallas, a JAX extension for custom kernels.
.. list-table::
:header-rows: 1
* - Module
- Since JAX
- Since ROCm
* - ``jax.experimental.pallas.mosaic_gpu``
- 0.4.31
- 6.1.3
* - ``jax.experimental.pallas.tpu``
- 0.4.15
- 5.5.0
* - ``jax.experimental.pallas.triton``
- 0.4.32
- 6.1.3
jax.experimental.sparse module
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Experimental support for sparse matrix operations.
.. list-table::
:header-rows: 1
* - Module
- Since JAX
- Since ROCm
* - ``jax.experimental.sparse.linalg``
- 0.3.15
- 5.2.0
* - ``jax.experimental.sparse.sparsify``
- 0.3.25
- ❌
.. list-table::
:header-rows: 1
* - ``sparse`` data structure API
- Since JAX
- Since ROCm
* - ``jax.experimental.sparse.BCOO``
- 0.1.72
- 5.0.0
* - ``jax.experimental.sparse.BCSR``
- 0.3.20
- 5.1.0
* - ``jax.experimental.sparse.CSR``
- 0.1.75
- 5.0.0
* - ``jax.experimental.sparse.NM``
- 0.4.27
- 5.6.0
* - ``jax.experimental.sparse.COO``
- 0.1.75
- 5.0.0
Unsupported JAX features
------------------------
The following are GPU-accelerated JAX features not currently supported by
ROCm.
.. list-table::
:header-rows: 1
* - Feature
- Description
- Since JAX
* - Mixed Precision with TF32
- Mixed precision with TF32 is used for matrix multiplications,
convolutions, and other linear algebra operations, particularly in
deep learning workloads like CNNs and transformers.
- 0.2.25
* - RNN support
- Currently only LSTM with double bias is supported with float32 input
and weight.
- 0.3.25
* - XLA int4 support
- 4-bit integer (int4) precision in the XLA compiler.
- 0.4.0
* - ``jax.experimental.sparsify``
- Converts a dense matrix to a sparse matrix representation.
- Experimental
* - MOSAIC (GPU)
- Mosaic is a library of kernel-building abstractions for JAX's Pallas system
Use cases and recommendations
================================================================================
* The `nanoGPT in JAX <https://rocm.blogs.amd.com/artificial-intelligence/nanoGPT-JAX/README.html>`_
blog explores the implementation and training of a Generative Pre-trained
Transformer (GPT) model in JAX, inspired by Andrej Karpathys PyTorch-based
nanoGPT. By comparing how essential GPT components—such as self-attention
mechanisms and optimizers—are realized in PyTorch and JAX, also highlight
JAXs unique features.
* The `Optimize GPT Training: Enabling Mixed Precision Training in JAX using
ROCm on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-mixed-precision/README.html>`_
blog post provides a comprehensive guide on enhancing the training efficiency
of GPT models by implementing mixed precision techniques in JAX, specifically
tailored for AMD GPUs utilizing the ROCm platform.
* The `Supercharging JAX with Triton Kernels on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/jax-triton/README.html>`_
blog demonstrates how to develop a custom fused dropout-activation kernel for
matrices using Triton, integrate it with JAX, and benchmark its performance
using ROCm.
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
outlines the process of fine-tuning a Bidirectional Encoder Representations
from Transformers (BERT)-based large language model (LLM) using JAX for a text
classification task. The blog post discuss techniques for parallelizing the
fine-tuning across multiple AMD GPUs and assess the model's performance on a
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
and the General Language Understanding Evaluation (GLUE) benchmark dataset was
used on a multi-GPU setup.
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
accelerator using ROCm. The page is aimed at helping users achieve optimal
performance for deep learning and other high-performance computing tasks on
the MI300X GPU.
For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.blogs.amd.com/blog/tag/jax.html>`_.

View File

@@ -21,68 +21,31 @@ release cycles for PyTorch on ROCm:
- ROCm PyTorch release:
- Provides the latest version of ROCm but might not necessarily support the
latest stable PyTorch version.
- Provides the latest version of ROCm but doesn't immediately support the latest stable PyTorch
version.
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
preinstalled.
pre-installed.
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`_
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
to get started.
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` to get started.
- Official PyTorch release:
- Provides the latest stable version of PyTorch but might not necessarily
support the latest ROCm version.
- Provides the latest stable version of PyTorch but doesn't immediately support the latest ROCm version.
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`_
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`_
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_
to get started.
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_ to get started.
PyTorch includes tooling that generates HIP source code from the CUDA backend.
This approach allows PyTorch to support ROCm without requiring manual code
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
The upstream PyTorch includes an automatic HIPification solution that automatically generates HIP
source code from the CUDA backend. This approach allows PyTorch to support ROCm without requiring
manual code modifications.
ROCm development is aligned with the stable release of PyTorch, while upstream
PyTorch testing uses the stable release of ROCm to maintain consistency.
.. _pytorch-recommendations:
Use cases and recommendations
================================================================================
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
guides how to leverage the ROCm platform for training AI models. It covers the
steps, tools, and best practices for optimizing training workflows on AMD GPUs
using PyTorch features.
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
describes and demonstrates how to use the ROCm platform for the fine-tuning
and inference of machine learning models, particularly large language models
(LLMs), on systems with a single GPU. This topic provides a detailed guide for
setting up, optimizing, and executing fine-tuning and inference workflows in
such environments.
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
describes and demonstrates the fine-tuning and inference of machine learning
models on systems with multiple GPUs.
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
accelerator using ROCm. This guide helps users achieve optimal performance for
deep learning and other high-performance computing tasks on the MI300X
accelerator.
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
describes how PyTorch integrates with ROCm for AI workloads It outlines the
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
GPU hardware for training and inference tasks in AI applications.
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
Development of ROCm is aligned with the stable release of PyTorch while upstream PyTorch testing uses
the stable release of ROCm to maintain consistency.
.. _pytorch-docker-compat:
@@ -93,10 +56,10 @@ Docker image compatibility
<i class="fab fa-docker"></i>
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
with ROCm backends on Docker Hub. The following Docker image tags and associated
inventories were tested on `ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_.
Click |docker-icon| to view the image on Docker Hub.
AMD validates and publishes ready-made `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
with ROCm backends on Docker Hub. The following Docker image tags and
associated inventories are validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_.
Click the |docker-icon| icon to view the image on Docker Hub.
.. list-table:: PyTorch Docker image components
:header-rows: 1
@@ -116,130 +79,131 @@ Click |docker-icon| to view the image on Docker Hub.
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-c76af9bfb1c25b0f40d4c29e8652105c57250bf018d23ff595b06bd79666fdd7"><i class="fab fa-docker fa-lg"></i></a>
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu24.04_py3.12_pytorch_release_2.4.0/images/sha256-6c798857b2c9526b44ba535710b93a1737546acea79b53a93c646195c272f1d5"><i class="fab fa-docker fa-lg"></i></a>
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.16.0 <https://github.com/openucx/ucx/tree/v1.16.0>`_
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-f9d226135d51831c810dcb1251636ec61f85c65fcdda03e188c053a5d4f6585b"><i class="fab fa-docker fa-lg"></i></a>
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
- 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`_
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-3490e74d4f43dcdb3351dd334108d1ccd47e5a687c0523a2424ac1bcdd3dd6dd"><i class="fab fa-docker fa-lg"></i></a>
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
- 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-26c5dfffb4a54625884abca83166940f17dd27bc75f1b24f6e80fbcb7d4e9afb"><i class="fab fa-docker fa-lg"></i></a>
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
- 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`_
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-f378a24561fa6efc178b6dc93fc7d82e5b93653ecd59c89d4476674d29e1284d"><i class="fab fa-docker fa-lg"></i></a>
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`_
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-2308dbd0e650b7bf8d548575cbb6e2bdc021f9386384ce570da16d58ee684d22"><i class="fab fa-docker fa-lg"></i></a>
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.4.0/images/sha256-a09b21248133876fc8912a5ff4e6ee2c8d62b14120313e426b3dadda5702713d"><i class="fab fa-docker fa-lg"></i></a>
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-eefd2ab019728f91f94c5e6a9463cb0ea900b3011458d18fe5d88e50c0b57d86"><i class="fab fa-docker fa-lg"></i></a>
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_2.4.0/images/sha256-963187534467f0f9da77996762fc1d112a6faa5372277c348a505533e7876ec8"><i class="fab fa-docker fa-lg"></i></a>
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
- 24.04
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
- 22.04
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`_
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`_
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`_
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.0.7 <https://github.com/open-mpi/ompi/tree/v4.0.7>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-473643226ab0e93a04720b256ed772619878abf9c42b9f84828cefed522696fd"><i class="fab fa-docker fa-lg"></i></a>
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-952f2621bd2bf3078bef19061e05b209105a82a7908e7e6cdf85014938a4d93a"><i class="fab fa-docker fa-lg"></i></a>
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
- 22.04
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`_
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`_
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`_
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
Key ROCm libraries for PyTorch
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.10_pytorch_release_2.2.1/images/sha256-a2fe20e170feb9e05da3e5728bb98e40d08567e137be8e6ba797962ed2852608"><i class="fab fa-docker fa-lg"></i></a>
- `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
- 22.04
- `3.10 <https://www.python.org/downloads/release/python-31016/>`_
- `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
- `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_2.2.1/images/sha256-7f231937c897cca5f89e360be33c70a2017d60f62d1fbe81292be48c15fe345b"><i class="fab fa-docker fa-lg"></i></a>
- `2.2.1 <https://github.com/ROCm/pytorch/tree/release/2.2>`_
- 20.04
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
- `1.2.0 <https://github.com/ROCm/apex/tree/release/1.2.0>`_
- `0.17.1 <https://github.com/pytorch/vision/tree/v0.17.1>`_
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13.0>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu22.04_py3.9_pytorch_release_1.13.1/images/sha256-616a47758004f91951e2da6c1fe291f903de65a7b2318d4b18359b48fe3032f4"><i class="fab fa-docker fa-lg"></i></a>
- `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
- 22.04
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
- `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
- `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
- `2.19.0 <https://github.com/tensorflow/tensorboard/tree/2.19>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.14.1 <https://github.com/openucx/ucx/tree/v1.14.1>`_
- `4.1.5 <https://github.com/open-mpi/ompi/tree/v4.1.5>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.3.3_ubuntu20.04_py3.9_pytorch_release_1.13.1/images/sha256-a2cfb365aea58b84595e241ffdb0d5ef3e6566e98c10b5499f4aa29983a74ea2"><i class="fab fa-docker fa-lg"></i></a>
- `1.13.1 <https://github.com/ROCm/pytorch/tree/release/1.13>`_
- 20.04
- `3.9.21 <https://www.python.org/downloads/release/python-3921/>`_
- `1.0.0 <https://github.com/ROCm/apex/tree/release/1.0.0>`_
- `0.14.0 <https://github.com/pytorch/vision/tree/v0.14.0>`_
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18>`_
- `master <https://bitbucket.org/icl/magma/src/master/>`_
- `1.10.0 <https://github.com/openucx/ucx/tree/v1.10.0>`_
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
Critical ROCm libraries for PyTorch
================================================================================
PyTorch functionality on ROCm is determined by its underlying library
dependencies. These ROCm components affect the capabilities, performance, and
feature set available to developers.
The functionality of PyTorch with ROCm is determined by its underlying library
dependencies. These critical ROCm components affect the capabilities,
performance, and feature set available to developers. The versions described
are available in ROCm :version:`rocm_version`.
.. list-table::
:header-rows: 1
@@ -259,23 +223,24 @@ feature set available to developers.
- :version-ref:`hipBLAS rocm_version`
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
matrix and vector operations.
- Supports operations such as matrix multiplication, matrix-vector
products, and tensor contractions. Utilized in both dense and batched
linear algebra operations.
- Supports operations like matrix multiplication, matrix-vector products,
and tensor contractions. Utilized in both dense and batched linear
algebra operations.
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
- :version-ref:`hipBLASLt rocm_version`
- hipBLASLt is an extension of the hipBLAS library, providing additional
features like epilogues fused into the matrix multiplication kernel or
use of integer tensor cores.
- Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
- It accelerates operations like ``torch.matmul``, ``torch.mm``, and the
matrix multiplications used in convolutional and linear layers.
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
- :version-ref:`hipCUB rocm_version`
- Provides a C++ template library for parallel algorithms for reduction,
scan, sort and select.
- Supports operations such as ``torch.sum``, ``torch.cumsum``,
``torch.sort`` irregular shapes often involve scanning, sorting, and
filtering, which hipCUB handles efficiently.
- Supports operations like ``torch.sum``, ``torch.cumsum``, ``torch.sort``
and ``torch.topk``. Operations on sparse tensors or tensors with
irregular shapes often involve scanning, sorting, and filtering, which
hipCUB handles efficiently.
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
- :version-ref:`hipFFT rocm_version`
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
@@ -283,8 +248,8 @@ feature set available to developers.
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
- :version-ref:`hipRAND rocm_version`
- Provides fast random number generation for GPUs.
- The ``torch.rand``, ``torch.randn``, and stochastic layers like
``torch.nn.Dropout`` rely on hipRAND.
- The ``torch.rand``, ``torch.randn`` and stochastic layers like
``torch.nn.Dropout``.
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
- :version-ref:`hipSOLVER rocm_version`
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
@@ -355,7 +320,7 @@ feature set available to developers.
- :version-ref:`RPP rocm_version`
- Speeds up data augmentation, transformation, and other preprocessing steps.
- Easy to integrate into PyTorch's ``torch.utils.data`` and
``torchvision`` data load workloads to speed up data processing.
``torchvision`` data load workloads.
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
- :version-ref:`rocThrust rocm_version`
- Provides a C++ template library for parallel algorithms like sorting,
@@ -372,11 +337,11 @@ feature set available to developers.
involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
more.
Supported features
Supported and unsupported features
================================================================================
This section maps GPU-accelerated PyTorch features to their supported ROCm and
PyTorch versions.
The following section maps GPU-accelerated PyTorch features to their supported
ROCm and PyTorch versions.
torch
--------------------------------------------------------------------------------
@@ -384,24 +349,23 @@ torch
`torch <https://pytorch.org/docs/stable/index.html>`_ is the central module of
PyTorch, providing data structures for multi-dimensional tensors and
implementing mathematical operations on them. It also includes utilities for
efficient serialization of tensors and arbitrary data types and other tools.
efficient serialization of tensors and arbitrary data types, along with various
other tools.
Tensor data types
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The tensor data type is specified using the ``dtype`` attribute or argument.
PyTorch supports many data types for different use cases.
The data type of a tensor is specified using the ``dtype`` attribute or argument, and PyTorch supports a wide range of data types for different use cases.
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_
single data types:
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_'s single data types:
.. list-table::
:header-rows: 1
* - Data type
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - ``torch.float8_e4m3fn``
- 8-bit floating point, e4m3
- 2.3
@@ -493,11 +457,11 @@ single data types:
.. note::
Unsigned types except ``uint8`` have limited support in eager mode. They
primarily exist to assist usage with ``torch.compile``.
Unsigned types aside from ``uint8`` are currently only have limited support in
eager mode (they primarily exist to assist usage with ``torch.compile``).
See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
native hardware support of data types.
The :doc:`ROCm precision support page <rocm:reference/precision-support>`
collected the native HW support of different data types.
torch.cuda
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -512,8 +476,8 @@ leveraging ROCm and CUDA as the underlying frameworks.
* - Feature
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - Device management
- Utilities for managing and interacting with GPUs.
- 0.4.0
@@ -587,8 +551,8 @@ PyTorch interacts with the ROCm or CUDA environment.
* - Feature
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - ``cufft_plan_cache``
- Manages caching of GPU FFT plans to optimize repeated FFT computations.
- 1.7.0
@@ -636,8 +600,8 @@ Supported ``torch`` options include:
* - Option
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - ``allow_tf32``
- TensorFloat-32 tensor cores may be used in cuDNN convolutions on NVIDIA
Ampere or newer GPUs.
@@ -652,28 +616,28 @@ Supported ``torch`` options include:
Automatic mixed precision: torch.amp
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
PyTorch automates the process of using both 16-bit (half-precision, float16) and
32-bit (single-precision, float32) floating-point types in model training and
inference.
PyTorch that automates the process of using both 16-bit (half-precision,
float16) and 32-bit (single-precision, float32) floating-point types in model
training and inference.
.. list-table::
:header-rows: 1
* - Feature
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - Autocasting
- Autocast instances serve as context managers or decorators that allow
- Instances of autocast serve as context managers or decorators that allow
regions of your script to run in mixed precision.
- 1.9
- 2.5
* - Gradient scaling
- To prevent underflow, “gradient scaling” multiplies the networks
loss by a scale factor and invokes a backward pass on the scaled
loss. The same factor then scales gradients flowing backward through
the network. In other words, gradient values have a larger magnitude so
that they dont flush to zero.
loss(es) by a scale factor and invokes a backward pass on the scaled
loss(es). Gradients flowing backward through the network are then
scaled by the same factor. In other words, gradient values have a
larger magnitude, so they dont flush to zero.
- 1.9
- 2.5
* - CUDA op-specific behavior
@@ -687,7 +651,7 @@ inference.
Distributed library features
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
PyTorch distributed library includes a collective of parallelism modules, a
The PyTorch distributed library includes a collective of parallelism modules, a
communications layer, and infrastructure for launching and debugging large
training jobs. See :ref:`rocm-for-ai-pytorch-distributed` for more information.
@@ -701,13 +665,13 @@ of computational resources and scalability for large-scale tasks.
* - Feature
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - TensorPipe
- A point-to-point communication library integrated into
PyTorch for distributed training. It handles tensor data transfers
efficiently between different processes or devices, including those on
separate machines.
PyTorch for distributed training. It is designed to handle tensor data
transfers efficiently between different processes or devices, including
those on separate machines.
- 1.8
- 5.4
* - Gloo
@@ -726,8 +690,8 @@ torch.compiler
* - Feature
- Description
- As of PyTorch
- As of ROCm
- Since PyTorch
- Since ROCm
* - ``torch.compiler`` (AOT Autograd)
- Autograd captures not only the user-level code, but also backpropagation,
which results in capturing the backwards pass “ahead-of-time”. This
@@ -750,8 +714,8 @@ The `torchaudio <https://pytorch.org/audio/stable/index.html>`_ library provides
utilities for processing audio data in PyTorch, such as audio loading,
transformations, and feature extraction.
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to
explicitly move audio data (waveform tensor) to GPU using ``.to('cuda')``.
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to move audio
data (waveform tensor) explicitly to GPU using ``.to('cuda')``.
The following ``torchaudio`` features are GPU-accelerated.
@@ -760,10 +724,10 @@ The following ``torchaudio`` features are GPU-accelerated.
* - Feature
- Description
- As of torchaudio version
- As of ROCm
- Since torchaudio version
- Since ROCm
* - ``torchaudio.transforms.Spectrogram``
- Generate a spectrogram of an input waveform using STFT.
- Generates spectrogram of an input waveform using STFT.
- 0.6.0
- 4.5
* - ``torchaudio.transforms.MelSpectrogram``
@@ -783,7 +747,7 @@ torchvision
--------------------------------------------------------------------------------
The `torchvision <https://pytorch.org/vision/stable/index.html>`_ library
provides datasets, model architectures, and common image transformations for
provide datasets, model architectures, and common image transformations for
computer vision.
The following ``torchvision`` features are GPU-accelerated.
@@ -793,8 +757,8 @@ The following ``torchvision`` features are GPU-accelerated.
* - Feature
- Description
- As of torchvision version
- As of ROCm
- Since torchvision version
- Since ROCm
* - ``torchvision.transforms.functional``
- Provides GPU-compatible transformations for image preprocessing like
resize, normalize, rotate and crop.
@@ -840,7 +804,7 @@ torchtune
The `torchtune <https://pytorch.org/torchtune/stable/index.html>`_ library for
authoring, fine-tuning and experimenting with LLMs.
* Usage: Enabling developers to fine-tune ROCm PyTorch solutions.
* Usage: It works out-of-the-box, enabling developers to fine-tune ROCm PyTorch solutions.
* Only official release exists.
@@ -851,8 +815,7 @@ The `torchserve <https://pytorch.org/serve/>`_ is a PyTorch domain library
for common sparsity and parallelism primitives needed for large-scale recommender
systems.
* torchtext does not implement its own kernels. ROCm support is enabled by
linking against ROCm libraries.
* torchtext does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
* Only official release exists.
@@ -863,16 +826,14 @@ The `torchrec <https://pytorch.org/torchrec/>`_ is a PyTorch domain library for
common sparsity and parallelism primitives needed for large-scale recommender
systems.
* torchrec does not implement its own kernels. ROCm support is enabled by
linking against ROCm libraries.
* torchrec does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
* Only official release exists.
Unsupported PyTorch features
================================================================================
----------------------------
The following GPU-accelerated PyTorch features are not supported by ROCm for
the listed supported PyTorch versions.
The following are GPU-accelerated PyTorch features not currently supported by ROCm.
.. list-table::
:widths: 30, 60, 10
@@ -880,7 +841,7 @@ the listed supported PyTorch versions.
* - Feature
- Description
- As of PyTorch
- Since PyTorch
* - APEX batch norm
- Use APEX batch norm instead of PyTorch batch norm.
- 1.6.0
@@ -936,3 +897,31 @@ the listed supported PyTorch versions.
utilized effectively through custom CUDA extensions or advanced
workflows.
- Not a core feature
Use cases and recommendations
================================================================================
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/train-a-model>` provides
guidance on how to leverage the ROCm platform for training AI models. It covers the steps, tools, and best practices
for optimizing training workflows on AMD GPUs using PyTorch features.
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
describes and demonstrates how to use the ROCm platform for the fine-tuning and inference of
machine learning models, particularly large language models (LLMs), on systems with a single AMD
Instinct MI300X accelerator. This page provides a detailed guide for setting up, optimizing, and
executing fine-tuning and inference workflows in such environments.
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
describes and demonstrates the fine-tuning and inference of machine learning models on systems
with multi MI300X accelerators.
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>` provides detailed
guidance on optimizing workloads for the AMD Instinct MI300X accelerator using ROCm. This guide is aimed at helping
users achieve optimal performance for deep learning and other high-performance computing tasks on the MI300X
accelerator.
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
describes how PyTorch integrates with ROCm for AI workloads It outlines the use of PyTorch on the ROCm platform and
focuses on how to efficiently leverage AMD GPU hardware for training and inference tasks in AI applications.
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.

View File

@@ -56,7 +56,7 @@ Docker image compatibility
AMD validates and publishes ready-made `TensorFlow images
<https://hub.docker.com/r/rocm/tensorflow>`_ with ROCm backends on
Docker Hub. The following Docker image tags and associated inventories are
validated for `ROCm 6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`_. Click
validated for `ROCm 6.3.3 <https://repo.radeon.com/rocm/apt/6.3.3/>`_. Click
the |docker-icon| icon to view the image on Docker Hub.
.. list-table:: TensorFlow Docker image components
@@ -64,132 +64,58 @@ the |docker-icon| icon to view the image on Docker Hub.
* - Docker image
- TensorFlow
- Ubuntu
- Dev
- Python
- TensorBoard
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4-py3.12-tf2.18-dev/images/sha256-fa9cf5fa6c6079a7118727531ccd0056c6e3224a42c3d6e78a49e7781daafff4"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- `rocm/tensorflow`__
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- dev
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.18-runtime/images/sha256-d14d8c4989e7c9a60f4e72461b9e349de72347c6162dcd6897e6f4f80ffbb440"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- runtime
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.18-dev/images/sha256-081e5bd6615a5dc17247ebd2ccc26895c3feeff086720400fa39b477e60a77c0"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- `rocm/tensorflow`__
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- dev
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.18-runtime/images/sha256-bf369637378264f4af6ddad5ca8b8611d3e372ffbea9ab7a06f1e122f0a0867b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- runtime
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.17-dev/images/sha256-5a502008c50d0b6508e6027f911bdff070a7493700ae064bed74e1d22b91ed50"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `rocm/tensorflow`__
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- dev
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.17-runtime/images/sha256-1ee5dfffceb71ac66617ada33de3a10de0cb74199cc4b82441192e5e92fa2ddf"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- runtime
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-3124/>`_
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.17-dev/images/sha256-109218ad92bfae83bbd2710475f7502166e1ed54ca0b9748a9cbc3f5a1d75af1"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- `rocm/tensorflow`__
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- dev
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.17-runtime/images/sha256-5d78bd5918d394f92263daa2990e88d695d27200dd90ed83ec64d20c7661c9c1"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- runtime
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.16-dev/images/sha256-b09b1ad921c09c687b7c916141051e9fcf15539a5686e5aa67c689195a522719"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- `rocm/tensorflow`__
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- dev
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `Python 3.12.4 <https://www.python.org/downloads/release/python-3124/>`_
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.12-tf2.16-runtime/images/sha256-20dbd824e85558abfe33fc9283cc547d88cde3c623fe95322743a5082f883a64"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- runtime
- 24.04
- `Python 3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-dev/images/sha256-36c4fa047c86e2470ac473ec1429aea6d4b8934b90ffeb34d1afab40e7e5b377"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-dev/images/sha256-36c4fa047c86e2470ac473ec1429aea6d4b8934b90ffeb34d1afab40e7e5b377>`__
- `rocm/tensorflow`__
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
- dev
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `Python 3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
* - .. raw:: html
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.1-py3.10-tf2.16-runtime/images/sha256-a94150ffb81365234ebfa34e764db5474bc6ab7d141b56495eac349778dafcf3"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.1/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
- runtime
- 22.04
- `Python 3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`_
Critical ROCm libraries for TensorFlow
===============================================================================

View File

@@ -34,15 +34,15 @@ project = "ROCm Documentation"
project_path = os.path.abspath(".").replace("\\", "/")
author = "Advanced Micro Devices, Inc."
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
version = "6.4.1"
release = "6.4.1"
version = "6.4.0"
release = "6.4.0"
setting_all_article_info = True
all_article_info_os = ["linux", "windows"]
all_article_info_author = ""
# pages with specific settings
article_pages = [
{"file": "about/release-notes", "os": ["linux"], "date": "2025-05-07"},
{"file": "about/release-notes", "os": ["linux"], "date": "2025-04-11"},
{"file": "release/changelog", "os": ["linux"],},
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
@@ -51,15 +51,12 @@ article_pages = [
{"file": "how-to/deep-learning-rocm", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
@@ -69,10 +66,10 @@ article_pages = [
{"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/install", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/vllm-benchmark", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/pytorch-inference-benchmark", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},

View File

@@ -1,25 +0,0 @@
pytorch_inference_benchmark:
unified_docker:
latest: &rocm-pytorch-docker-latest
pull_tag: rocm/pytorch:latest
docker_hub_url:
rocm_version:
pytorch_version:
hipblaslt_version:
model_groups:
- group: CLIP
tag: clip
models:
- model: CLIP
mad_tag: pyt_clip_inference
model_repo: laion/CLIP-ViT-B-32-laion2B-s34B-b79K
url: https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K
precision: float16
- group: Chai-1
tag: chai
models:
- model: Chai-1
mad_tag: pyt_chai1_inference
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/chaidiscovery/chai-1
precision: float16

View File

@@ -1,14 +1,14 @@
vllm_benchmark:
unified_docker:
latest:
pull_tag: rocm/vllm:rocm6.3.1_vllm0.8.5_20250513
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4
pull_tag: rocm/vllm:instinct_main
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250311/images/sha256-de0a2649b735f45b7ecab8813eb7b19778ae1f40591ca1196b07bc29c42ed4a3
rocm_version: 6.3.1
vllm_version: 0.8.5
pytorch_version: 2.7.0+gitf717b2a
hipblaslt_version: 0.15
vllm_version: 0.7.3
pytorch_version: 2.7.0 (dev nightly)
hipblaslt_version: 0.13
model_groups:
- group: Meta Llama
- group: Llama
tag: llama
models:
- model: Llama 3.1 8B
@@ -56,7 +56,7 @@ vllm_benchmark:
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
- group: Mistral AI
- group: Mistral
tag: mistral
models:
- model: Mixtral MoE 8x7B
@@ -102,13 +102,20 @@ vllm_benchmark:
model_repo: Qwen/Qwen2-72B-Instruct
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
precision: float16
- model: QwQ-32B
mad_tag: pyt_vllm_qwq-32b
model_repo: Qwen/QwQ-32B
url: https://huggingface.co/Qwen/QwQ-32B
- group: JAIS
tag: jais
models:
- model: JAIS 13B
mad_tag: pyt_vllm_jais-13b
model_repo: core42/jais-13b-chat
url: https://huggingface.co/core42/jais-13b-chat
precision: float16
tunableop: true
- group: Databricks DBRX
- model: JAIS 30B
mad_tag: pyt_vllm_jais-30b
model_repo: core42/jais-30b-chat-v3
url: https://huggingface.co/core42/jais-30b-chat-v3
precision: float16
- group: DBRX
tag: dbrx
models:
- model: DBRX Instruct
@@ -121,7 +128,7 @@ vllm_benchmark:
model_repo: amd/dbrx-instruct-FP8-KV
url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
precision: float8
- group: Google Gemma
- group: Gemma
tag: gemma
models:
- model: Gemma 2 27B
@@ -150,18 +157,3 @@ vllm_benchmark:
model_repo: deepseek-ai/deepseek-moe-16b-chat
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
precision: float16
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4
- group: TII Falcon
tag: falcon
models:
- model: Falcon 180B
mad_tag: pyt_vllm_falcon-180b
model_repo: tiiuae/falcon-180B
url: https://huggingface.co/tiiuae/falcon-180B
precision: float16

View File

@@ -1,29 +0,0 @@
megatron-lm_benchmark:
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

View File

@@ -1,178 +1,15 @@
.. meta::
:description: How to use model quantization techniques to speed up inference.
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, Quark, GPTQ, transformers, bitsandbytes
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, GPTQ, transformers, bitsandbytes
*****************************
Model quantization techniques
*****************************
Quantization reduces the model size compared to its native full-precision version, making it easier to fit large models
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using AMD Quark, GPTQ
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using GPTQ
and bitsandbytes on AMD Instinct hardware.
.. _quantize-llms-quark:
AMD Quark
=========
`AMD Quark <https://quark.docs.amd.com/latest/>`_ offers the leading efficient and scalable quantization solution tailored to AMD Instinct GPUs. It supports ``FP8`` and ``INT8`` quantization for activations, weights, and KV cache,
including ``FP8`` attention. For very large models, it employs a two-level ``INT4-FP8`` scheme—storing weights in ``INT4`` while computing with ``FP8``—for nearly 4× compression without sacrificing accuracy.
Quark scales efficiently across multiple GPUs, efficiently handling ultra-large models like Llama-3.1-405B. Quantized ``FP8`` models like Llama, Mixtral, and Grok-1 are available under the `AMD organization on Hugging Face <https://huggingface.co/collections/amd/quark-quantized-ocp-fp8-models-66db7936d18fcbaf95d4405c>`_, and can be deployed directly via `vLLM <https://github.com/vllm-project/vllm/tree/main/vllm>`_.
Installing Quark
-------------------
The latest release of Quark can be installed with pip
.. code-block:: shell
pip install amd-quark
For detailed installation instructions, refer to the `Quark documentation <https://quark.docs.amd.com/latest/install.html>`_.
Using Quark for quantization
-----------------------------
#. First, load the pre-trained model and its corresponding tokenizer using the Hugging Face ``transformers`` library.
.. code-block:: python
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
MAX_SEQ_LEN = 512
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, device_map="auto", torch_dtype="auto",
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, model_max_length=MAX_SEQ_LEN)
tokenizer.pad_token = tokenizer.eos_token
#. Prepare the calibration DataLoader (static quantization requires calibration data).
.. code-block:: python
from datasets import load_dataset
from torch.utils.data import DataLoader
BATCH_SIZE = 1
NUM_CALIBRATION_DATA = 512
dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
text_data = dataset["text"][:NUM_CALIBRATION_DATA]
tokenized_outputs = tokenizer(
text_data, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SEQ_LEN
)
calib_dataloader = DataLoader(
tokenized_outputs['input_ids'], batch_size=BATCH_SIZE, drop_last=True
)
#. Define the quantization configuration. See the comments in the following code snippet for descriptions of each configuration option.
.. code-block:: python
from quark.torch.quantization import (Config, QuantizationConfig,
FP8E4M3PerTensorSpec)
# Define fp8/per-tensor/static spec.
FP8_PER_TENSOR_SPEC = FP8E4M3PerTensorSpec(observer_method="min_max",
is_dynamic=False).to_quantization_spec()
# Define global quantization config, input tensors and weight apply FP8_PER_TENSOR_SPEC.
global_quant_config = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC,
weight=FP8_PER_TENSOR_SPEC)
# Define quantization config for kv-cache layers, output tensors apply FP8_PER_TENSOR_SPEC.
KV_CACHE_SPEC = FP8_PER_TENSOR_SPEC
kv_cache_layer_names_for_llama = ["*k_proj", "*v_proj"]
kv_cache_quant_config = {name :
QuantizationConfig(input_tensors=global_quant_config.input_tensors,
weight=global_quant_config.weight,
output_tensors=KV_CACHE_SPEC)
for name in kv_cache_layer_names_for_llama}
layer_quant_config = kv_cache_quant_config.copy()
EXCLUDE_LAYERS = ["lm_head"]
quant_config = Config(
global_quant_config=global_quant_config,
layer_quant_config=layer_quant_config,
kv_cache_quant_config=kv_cache_quant_config,
exclude=EXCLUDE_LAYERS)
#. Quantize the model and export
.. code-block:: python
import torch
from quark.torch import ModelQuantizer, ModelExporter
from quark.torch.export import ExporterConfig, JsonExporterConfig
# Apply quantization.
quantizer = ModelQuantizer(quant_config)
quant_model = quantizer.quantize_model(model, calib_dataloader)
# Freeze quantized model to export.
freezed_model = quantizer.freeze(model)
# Define export config.
LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"]
export_config = ExporterConfig(json_export_config=JsonExporterConfig())
export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP
EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor"
exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR)
with torch.no_grad():
exporter.export_safetensors_model(freezed_model,
quant_config=quant_config, tokenizer=tokenizer)
Evaluating the quantized model with vLLM
----------------------------------------
The exported Quark-quantized model can be loaded directly by vLLM for inference. You need to specify the model path and inform vLLM about the quantization method (``quantization='quark'``) and the KV cache data type (``kv_cache_dtype='fp8'``).
Use the ``LLM`` interface to load the model:
.. code-block:: python
from vllm import LLM, SamplingParamsinterface
# Sample prompts.
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# Create an LLM.
llm = LLM(model="Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor",
kv_cache_dtype='fp8',quantization='quark')
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}")
print(f"Output: {generated_text!r}")
print("-" * 60)
You can also evaluate the quantized model's accuracy on standard benchmarks using the `lm-evaluation-harness <https://github.com/EleutherAI/lm-evaluation-harness>`_. Pass the necessary vLLM arguments to ``lm_eval`` via ``--model_args``.
.. code-block:: shell
lm_eval --model vllm \
--model_args pretrained=Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor,kv_cache_dtype='fp8',quantization='quark' \
--tasks gsm8k
This provides a standardized way to measure the performance impact of quantization.
.. _fine-tune-llms-gptq:
GPTQ
@@ -196,7 +33,7 @@ The AutoGPTQ library implements the GPTQ algorithm.
.. code-block:: shell
# This will install pre-built wheel for a specific ROCm version.
pip install auto-gptq --no-build-isolation --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm573/
Or, install AutoGPTQ from source for the appropriate ROCm version (for example, ROCm 6.1).
@@ -206,10 +43,10 @@ The AutoGPTQ library implements the GPTQ algorithm.
# Clone the source code.
git clone https://github.com/AutoGPTQ/AutoGPTQ.git
cd AutoGPTQ
# Speed up the compilation by specifying PYTORCH_ROCM_ARCH to target device.
PYTORCH_ROCM_ARCH=gfx942 ROCM_VERSION=6.1 pip install .
# Show the package after the installation
#. Run ``pip show auto-gptq`` to print information for the installed ``auto-gptq`` package. Its output should look like
@@ -275,7 +112,7 @@ Using GPTQ with Hugging Face Transformers
.. code-block:: python
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
base_model_name = " NousResearch/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
@@ -375,10 +212,10 @@ To get started with bitsandbytes primitives, use the following code as reference
.. code-block:: python
import bitsandbytes as bnb
# Use Int8 Matrix Multiplication
bnb.matmul(..., threshold=6.0)
# Use bitsandbytes 8-bit Optimizers
adam = bnb.optim.Adam8bit(model.parameters(), lr=0.001, betas=(0.9, 0.995))
@@ -390,14 +227,14 @@ To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndByt
.. code-block:: python
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
base_model_name = "NousResearch/Llama-2-7b-hf"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
bnb_model_4bit = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
quantization_config=quantization_config)
# Check the memory footprint with get_memory_footprint method
print(bnb_model_4bit.get_memory_footprint())
@@ -406,9 +243,9 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
.. code-block:: python
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
base_model_name = "NousResearch/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
@@ -416,7 +253,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
base_model_name,
device_map="auto",
quantization_config=quantization_config)
prompt = "What is a large language model?"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_ids = model.generate(**inputs)

View File

@@ -1,179 +0,0 @@
.. meta::
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
ROCm PyTorch Docker image.
:keywords: model, MAD, automation, dashboarding, validate, pytorch
*************************************
PyTorch inference performance testing
*************************************
.. _pytorch-inference-benchmark-docker:
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/pytorch-inference-benchmark-models.yaml
{% set unified_docker = data.pytorch_inference_benchmark.unified_docker.latest %}
{% set model_groups = data.pytorch_inference_benchmark.model_groups %}
The `ROCm PyTorch Docker <https://hub.docker.com/r/rocm/pytorch/tags>`_ image offers a prebuilt,
optimized environment for testing model inference performance on AMD Instinct™ MI300X series
accelerators. This guide demonstrates how to use the AMD Model Automation and Dashboarding (MAD)
tool with the ROCm PyTorch container to test inference performance on various models efficiently.
.. _pytorch-inference-benchmark-available-models:
Supported models
================
The following models are supported for inference performance benchmarking
with PyTorch and ROCm. Some instructions, commands, and recommendations in this
documentation might vary by model -- select one to get started.
.. raw:: html
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
<div class="row">
<div class="col-2 me-2 model-param-head">Model group</div>
<div class="row col-10">
{% for model_group in model_groups %}
<div class="col-6 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
{% endfor %}
</div>
</div>
<div class="row mt-1" style="display: none;">
<div class="col-2 me-2 model-param-head">Model</div>
<div class="row col-10">
{% for model_group in model_groups %}
{% set models = model_group.models %}
{% for model in models %}
<div class="col-12 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
{% endfor %}
{% endfor %}
</div>
</div>
</div>
{% for model_group in model_groups %}
{% for model in model_group.models %}
.. container:: model-doc {{model.mad_tag}}
.. note::
See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
Some models require access authorization before use via an external license agreement through a third party.
{% endfor %}
{% endfor %}
System validation
=================
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
might hang until the periodic balancing is finalized. For more information,
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
.. code-block:: shell
# disable automatic NUMA balancing
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
# check if NUMA balancing is disabled (returns 0 if disabled)
cat /proc/sys/kernel/numa_balancing
0
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
Pull the Docker image
=====================
.. container:: model-doc pyt_chai1_inference
Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue/images/sha256-b736a4239ab38a9d0e448af6d4adca83b117debed00bfbe33846f99c4540f79b>`_ from Docker Hub.
.. code-block:: shell
docker pull rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue
.. note::
The Chai-1 benchmark uses a specifically selected Docker image using ROCm 6.2.3 and PyTorch 2.3.0 to address an accuracy issue.
.. container:: model-doc pyt_clip_inference
Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/latest/images/sha256-05b55983e5154f46e7441897d0908d79877370adca4d1fff4899d9539d6c4969>`_ from Docker Hub.
.. code-block:: shell
docker pull rocm/pytorch:latest
.. _pytorch-benchmark-get-started:
Benchmarking
============
.. _pytorch-inference-benchmark-mad:
{% for model_group in model_groups %}
{% for model in model_group.models %}
.. container:: model-doc {{model.mad_tag}}
To simplify performance testing, the ROCm Model Automation and Dashboarding
(`<https://github.com/ROCm/MAD>`__) project provides ready-to-use scripts and configuration.
To start, clone the MAD repository to a local directory and install the required packages on the
host machine.
.. code-block:: shell
git clone https://github.com/ROCm/MAD
cd MAD
pip install -r requirements.txt
Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
using one GPU with the ``{{model.precision}}`` data type on the host machine.
.. code-block:: shell
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
MAD launches a Docker container with the name
``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
model are collected in ``perf.csv``.
.. note::
For improved performance, consider enabling TunableOp. By default,
``{{model.mad_tag}}`` runs with TunableOp disabled (see
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To enable
it, edit the default run behavior in the ``tools/run_models.py``-- update the model's
run ``args`` by changing ``--tunableop off`` to ``--tunableop on``.
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
Although this might increase the initial training time, it can result in a performance gain.
{% endfor %}
{% endfor %}
Further reading
===============
- To learn more about system settings and management practices to configure your system for
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
- For application performance optimization strategies for HPC and AI workloads,
including inference with vLLM, see :doc:`../../inference-optimization/workload`.
- To learn how to run LLM models from Hugging Face or your model, see
:doc:`Running models from Hugging Face <../hugging-face-models>`.
- To learn how to optimize inference on LLMs, see
:doc:`Inference optimization <../../inference-optimization/index>`.
- To learn how to fine-tune LLMs, see
:doc:`Fine-tuning LLMs <../../fine-tuning/index>`.

View File

@@ -16,7 +16,8 @@ ROCm supports vLLM and Hugging Face TGI as major LLM-serving frameworks.
Serving using vLLM
==================
vLLM is a fast and easy-to-use library for LLM inference and serving. AMD is actively working with the vLLM team to improve performance and support the latest ROCm versions.
vLLM is a fast and easy-to-use library for LLM inference and serving. vLLM officially supports ROCm versions 5.7 and
6.0. AMD is actively working with the vLLM team to improve performance and support later ROCm versions.
See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `official vLLM documentation
<https://docs.vllm.ai/>`_ for more information.
@@ -30,9 +31,9 @@ vLLM installation
vLLM supports two ROCm-capable installation methods. Refer to the official documentation use the following links.
- `Build from source with Docker
<https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-image-from-source>`_ (recommended)
<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-docker-rocm>`_ (recommended)
- `Build from source <https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm#build-wheel-from-source>`_
- `Build from source <https://docs.vllm.ai/en/latest/getting_started/amd-installation.html#build-from-source-rocm>`_
vLLM walkthrough
----------------

View File

@@ -20,8 +20,6 @@ training, fine-tuning, and inference. It leverages popular machine learning fram
- :doc:`LLM inference frameworks <llm-inference-frameworks>`
- :doc:`vLLM inference performance testing <vllm-benchmark>`
- :doc:`PyTorch inference performance testing <pytorch-inference-benchmark>`
- :doc:`Performance testing <vllm-benchmark>`
- :doc:`Deploying your model <deploy-your-model>`

View File

@@ -30,7 +30,7 @@ ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`
* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`
* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`.
.. grid:: 1
@@ -59,8 +59,4 @@ images with the framework pre-installed.
* :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
Next steps
==========
After installing ROCm and your desired ML libraries -- and before running AI workloads -- conduct system health benchmarks
to test the optimal performance of your AMD hardware. See :doc:`system-health-check` to get started.
The sections that follow in :doc:`Training a model <../training/train-a-model>` are geared for a ROCm with PyTorch installation.

View File

@@ -36,7 +36,7 @@ Installing vLLM
git clone https://github.com/vllm-project/vllm.git
cd vllm
docker build -f docker/Dockerfile.rocm -t vllm-rocm .
docker build -f Dockerfile.rocm -t vllm-rocm .
.. tab-set::

View File

@@ -3,9 +3,9 @@
ROCm vLLM Docker image.
:keywords: model, MAD, automation, dashboarding, validate
**********************************
vLLM inference performance testing
**********************************
********************************************************
LLM inference performance testing on AMD Instinct MI300X
********************************************************
.. _vllm-benchmark-unified-docker:
@@ -16,7 +16,7 @@ vLLM inference performance testing
The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
a prebuilt, optimized environment for validating large language model (LLM)
inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
inference performance on AMD Instinct™ MI300X series accelerator. This ROCm vLLM
Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
accelerators and includes the following components:
@@ -34,18 +34,14 @@ vLLM inference performance testing
.. _vllm-benchmark-available-models:
Supported models
Available models
================
The following models are supported for inference performance benchmarking
with vLLM and ROCm. Some instructions, commands, and recommendations in this
documentation might vary by model -- select one to get started.
.. raw:: html
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
<div class="row">
<div class="col-2 me-2 model-param-head">Model group</div>
<div class="col-2 me-2 model-param-head">Model</div>
<div class="row col-10">
{% for model_group in model_groups %}
<div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
@@ -54,7 +50,7 @@ vLLM inference performance testing
</div>
<div class="row mt-1">
<div class="col-2 me-2 model-param-head">Model</div>
<div class="col-2 me-2 model-param-head">Model variant</div>
<div class="row col-10">
{% for model_group in model_groups %}
{% set models = model_group.models %}
@@ -115,37 +111,35 @@ vLLM inference performance testing
For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
see the developer's guide at `<https://github.com/ROCm/vllm/blob/main/docs/dev-docker/README.md>`__.
System validation
=================
Getting started
===============
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
Use the following procedures to reproduce the benchmark results on an
MI300X accelerator with the prebuilt vLLM Docker image.
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
might hang until the periodic balancing is finalized. For more information,
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
.. _vllm-benchmark-get-started:
.. code-block:: shell
1. Disable NUMA auto-balancing.
# disable automatic NUMA balancing
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
# check if NUMA balancing is disabled (returns 0 if disabled)
cat /proc/sys/kernel/numa_balancing
0
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
might hang until the periodic balancing is finalized. For more information,
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
.. code-block:: shell
Pull the Docker image
=====================
# disable automatic NUMA balancing
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
# check if NUMA balancing is disabled (returns 0 if disabled)
cat /proc/sys/kernel/numa_balancing
0
Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
Use the following command to pull the Docker image from Docker Hub.
2. Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
.. code-block:: shell
Use the following command to pull the Docker image from Docker Hub.
docker pull {{ unified_docker.pull_tag }}
.. code-block:: shell
docker pull {{ unified_docker.pull_tag }}
Benchmarking
============
@@ -189,25 +183,6 @@ vLLM inference performance testing
to collect latency and throughput performance data, you can also change the benchmarking
parameters. See the standalone benchmarking tab for more information.
{% if model.tunableop %}
.. note::
For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
TunableOp automatically explores different implementations and configurations of certain PyTorch
operators to find the fastest one for your hardware.
By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
(see
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
enable it, edit the default run behavior in the ``models.json``
configuration before running inference -- update the model's run
``args`` by changing ``--tunableop off`` to ``--tunableop on``.
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
{% endif %}
.. tab-item:: Standalone benchmarking
Run the vLLM benchmark tool independently by starting the
@@ -282,7 +257,7 @@ vLLM inference performance testing
* Latency benchmark
Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with the ``{{model.precision}}`` data type.
.. code-block::
@@ -292,11 +267,11 @@ vLLM inference performance testing
* Throughput benchmark
Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
Use this command to throughput the latency of the {{model.model}} model on eight GPUs with the ``{{model.precision}}`` data type.
.. code-block:: shell
./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
@@ -322,23 +297,23 @@ vLLM inference performance testing
Further reading
===============
- For application performance optimization strategies for HPC and AI workloads,
including inference with vLLM, see :doc:`../inference-optimization/workload`.
- To learn more about the options for latency and throughput benchmark scripts,
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
- To learn more about system settings and management practices to configure your system for
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
- For application performance optimization strategies for HPC and AI workloads,
including inference with vLLM, see :doc:`../../inference-optimization/workload`.
- To learn how to run LLM models from Hugging Face or your own model, see
:doc:`Running models from Hugging Face <../hugging-face-models>`.
:doc:`Running models from Hugging Face <hugging-face-models>`.
- To learn how to optimize inference on LLMs, see
:doc:`Inference optimization <../../inference-optimization/index>`.
:doc:`Inference optimization <../inference-optimization/index>`.
- To learn how to fine-tune LLMs, see
:doc:`Fine-tuning LLMs <../../fine-tuning/index>`.
:doc:`Fine-tuning LLMs <../fine-tuning/index>`.
Previous versions
=================
@@ -356,25 +331,11 @@ for benchmarking, see the version-specific documentation.
- PyTorch version
- Resources
* - 6.3.1
- 0.8.3
- 2.7.0
-
* `Documentation <https://rocm.docs.amd.com/en/docs-6.4.0/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845>`_
* - 6.3.1
- 0.7.3
- 2.7.0
-
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
* - 6.3.1
- 0.6.6
- 2.7.0
-
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_
* - 6.2.1

View File

@@ -1,104 +0,0 @@
.. meta::
:description: System health checks with RVS, RCCL tests, BabelStream, and TransferBench to validate AMD hardware performance running AI workloads.
:keywords: gpu, accelerator, system, health, validation, bench, perf, performance, rvs, rccl, babel, mi300x, mi325x, flops, bandwidth, rbt, training, inference
.. _rocm-for-ai-system-health-bench:
************************
System health benchmarks
************************
Before running AI workloads, it is important to validate that your AMD hardware is configured correctly and is performing optimally. This topic outlines several system health benchmarks you can use to test key aspects like GPU compute capabilities (FLOPS), memory bandwidth, and interconnect performance. Many of these tests are part of the ROCm Validation Suite (RVS).
ROCm Validation Suite (RVS) tests
=================================
RVS provides a collection of tests, benchmarks, and qualification tools, each
targeting a specific subsystem of the system under test. It includes tests for
GPU stress and memory bandwidth.
.. _healthcheck-install-rvs:
Install ROCm Validation Suite
-----------------------------
To get started, install RVS. For example, on an Ubuntu system with ROCm already
installed, run the following command:
.. code-block:: shell
sudo apt update
sudo apt install rocm-validation-suite
See the `ROCm Validation Suite installation instructions <https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/install/installation.html>`_,
and `System validation tests <https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#system-validation-tests>`_
in the Instinct documentation for more detailed instructions.
Benchmark, stress, and qualification tests
------------------------------------------
The GPU stress test runs various GEMM computations as workloads to stress the GPU FLOPS performance and check whether it
meets the configured target GFLOPS.
Run the benchmark, stress, and qualification tests included with RVS. See the `Benchmark, stress, qualification
<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/system-validation.html#benchmark-stress-qualification>`_
section of the Instinct documentation for usage instructions.
BabelStream test
----------------
BabelStream is a synthetic GPU benchmark based on the STREAM benchmark for
CPUs, measuring memory transfer rates to and from global device memory.
BabelStream tests are included with the RVS package as part of the `BABEL module
<https://rocm.docs.amd.com/projects/ROCmValidationSuite/en/latest/conceptual/rvs-modules.html#babel-benchmark-test-babel-module>`_.
For more information, see `Performance benchmarking
<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#babelstream-benchmarking-results>`_
in the Instinct documentation.
RCCL tests
==========
The ROCm Communication Collectives Library (RCCL) enables efficient multi-GPU
communication. The `<https://github.com/ROCm/rccl-tests>`__ suite benchmarks
the performance and verifies the correctness of these collective operations.
This helps ensure optimal scaling for multi-accelerator tasks.
1. To get started, build RCCL-tests using the official instructions in the README at
`<https://github.com/ROCm/rccl-tests?tab=readme-ov-file#build>`__ or use the
following commands:
.. code-block:: shell
git clone https://github.com/ROCm/rccl-tests.git
cd rccl-tests
make
2. Run the suggested RCCL tests -- see `RCCL benchmarking
<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#rccl-benchmarking-results>`_
in the Instinct performance benchmarking documentation for instructions.
TransferBench test
==================
TransferBench is a standalone utility for benchmarking simultaneous data
transfer performance between various devices in the system, including
CPU-to-GPU and GPU-to-GPU (peer-to-peer). This helps identify potential
bottlenecks in data movement between the host system and the GPUs, or between
GPUs, which can impact end-to-end latency.
.. _healthcheck-install-transferbench:
1. To get started, use the instructions in the `TransferBench documentation
<https://rocm.docs.amd.com/projects/TransferBench/en/latest/install/install.html#install-transferbench>`_
or use the following commands:
.. code:: shell
git clone https://github.com/ROCm/TransferBench.git
cd TransferBench
CC=hipcc make
2. Run the suggested TransferBench tests -- see `TransferBench benchmarking
<https://instinct.docs.amd.com/projects/system-acceptance/en/latest/mi300x/performance-bench.html#transferbench-benchmarking-results>`_
in the Instinct performance benchmarking documentation for instructions.

View File

@@ -12,7 +12,7 @@ ROCm is an optimized fork of the upstream
`<https://github.com/AI-Hypercomputer/maxtext>`__ enabling efficient AI workloads
on AMD MI300X series accelerators.
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.5``) image
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.4``) image
provides a prebuilt environment for training on AMD Instinct MI300X and MI325X accelerators,
including essential components like JAX, XLA, ROCm libraries, and MaxText utilities.
It includes the following software components:
@@ -20,15 +20,15 @@ It includes the following software components:
+--------------------------+--------------------------------+
| Software component | Version |
+==========================+================================+
| ROCm | 6.3.4 |
| ROCm | 6.3.0 |
+--------------------------+--------------------------------+
| JAX | 0.4.35 |
| JAX | 0.4.31 |
+--------------------------+--------------------------------+
| Python | 3.10.12 |
| Python | 3.10 |
+--------------------------+--------------------------------+
| Transformer Engine | 1.12.0.dev0+b8b92dc |
| Transformer Engine | 1.12.0.dev0+f81a3eb |
+--------------------------+--------------------------------+
| hipBLASLt | 0.13.0-ae9c477a |
| hipBLASLt | git78ec8622 |
+--------------------------+--------------------------------+
Supported features and models
@@ -48,8 +48,6 @@ MaxText provides the following key features to train large language models effic
The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
* Llama 3.3 70B
* Llama 3.1 8B
* Llama 3.1 70B
@@ -79,18 +77,11 @@ across different input sequences. Support for packed input format is planned for
System validation
=================
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
If you have already validated your system settings, including NUMA
auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
and optimization steps <train-a-model-system-validation>` to set up your system
before starting training.
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
Environment setup
=================
@@ -124,7 +115,7 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.
a. Master address
Change ``localhost`` to the master node's resolvable hostname or IP address:
Change `localhost` to the master node's resolvable hostname or IP address:
.. code-block:: bash
@@ -182,22 +173,20 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.
.. _amd-maxtext-download-docker:
Pull the Docker image
---------------------
Download the Docker image
-------------------------
1. Use the following command to pull the Docker image from Docker Hub.
.. code-block:: shell
docker pull rocm/jax-training:maxtext-v25.5
docker pull rocm/jax-training:maxtext-v25.4
2. Use the following command to launch the Docker container. Note that the benchmarking scripts
used in the :ref:`following section <amd-maxtext-get-started>` automatically launch the Docker container
and execute the benchmark.
2. Run the Docker container.
.. code-block:: shell
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.5
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.4
.. _amd-maxtext-get-started:
@@ -230,9 +219,7 @@ Single node training benchmarking examples
Run the single node training benchmark:
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_7b.sh
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_7b.sh
* Example 2: Single node training with Llama 2 70B
@@ -246,7 +233,7 @@ Single node training benchmarking examples
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_70b.sh
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_70b.sh
* Example 3: Single node training with Llama 3 8B
@@ -260,7 +247,7 @@ Single node training benchmarking examples
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_8b.sh
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_8b.sh
* Example 4: Single node training with Llama 3 70B
@@ -274,23 +261,9 @@ Single node training benchmarking examples
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_70b.sh
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_70b.sh
* Example 5: Single node training with Llama 3.3 70B
Download the benchmarking script:
.. code-block:: shell
wget https://raw.githubusercontent.com/ROCm/maxtext/refs/heads/main/benchmarks/gpu-rocm/llama3.3_70b.sh
Run the single node training benchmark:
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3.3_70b.sh
* Example 6: Single node training with DeepSeek V2 16B
* Example 5: Single node training with DeepSeek V2 16B
Download the benchmarking script:
@@ -302,7 +275,7 @@ Single node training benchmarking examples
.. code-block:: shell
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./deepseek_v2_16b.sh
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./deepseek_v2_16b.sh
.. note::
@@ -370,26 +343,3 @@ own cluster setup.
.. code-block:: shell
sbatch -N <num_nodes> llama3_70b_multinode.sh
Previous versions
=================
This table lists previous versions of the ROCm JAX MaxText Docker image for training
performance testing. For detailed information about available models for
benchmarking, see the version-specific documentation.
.. list-table::
:header-rows: 1
:stub-columns: 1
* - Image version
- ROCm version
- JAX version
- Resources
* - 25.4
- 6.3.0
- 0.4.31
-
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.html>`_
* `Docker Hub <https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.4/images/sha256-fb3eb71cd74298a7b3044b7130cf84113f14d518ff05a2cd625c11ea5f6a7b01>`_

View File

@@ -1,175 +0,0 @@
.. meta::
:description: How to train a model using LLM Foundry for ROCm.
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
******************************************
Training MPT-30B with LLM Foundry and ROCm
******************************************
MPT-30B is a 30-billion parameter decoder-style transformer-based model from
the Mosaic Pretrained Transformer (MPT) family -- learn more about it in
MosaicML's research blog `MPT-30B: Raising the bar for open-source foundation
models <https://www.databricks.com/blog/mpt-30b>`_.
ROCm and `<https://github.com/ROCm/MAD>`__ provide a pre-configured training
environment for the MPT-30B model using the ``rocm/pytorch-training:v25.5``
base `Docker image <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
and the `LLM Foundry <https://github.com/mosaicml/llm-foundry>`_ framework.
This environment packages the following software components to train
on AMD Instinct MI300X series accelerators:
+--------------------------+--------------------------------+
| Software component | Version |
+==========================+================================+
| ROCm | 6.3.4 |
+--------------------------+--------------------------------+
| PyTorch | 2.7.0a0+git6374332 |
+--------------------------+--------------------------------+
| Flash Attention | 3.0.0.post1 |
+--------------------------+--------------------------------+
Using this image, you can build, run, and test the training process
for MPT-30B with access to detailed logs and performance metrics.
System validation
=================
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
before starting training.
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
Getting started
===============
The following procedures help you set up the training environment in a
reproducible Docker container. This training environment is tailored for
training MPT-30B using LLM Foundry and the specific model configurations outlined.
Other configurations and run conditions outside those described in this
document are not validated.
.. tab-set::
.. tab-item:: MAD-integrated benchmarking
On your host machine, clone the ROCm Model Automation and Dashboarding
(`<https://github.com/ROCm/MAD>`__) repository to a local directory and
install the required packages.
.. code-block:: shell
git clone https://github.com/ROCm/MAD
cd MAD
pip install -r requirements.txt
Use this command to initiate the MPT-30B training benchmark.
.. code-block:: shell
python3 tools/run_models.py --tags pyt_mpt30b_training --keep-model-dir --live-output --clean-docker-cache
.. tip::
If you experience data download failures, set the
``MAD_SECRETS_HFTOKEN`` variable to your Hugging Face access token. See
`User access tokens <https://huggingface.co/docs/hub/security-tokens>`_
for details.
.. code-block:: shell
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
.. note::
For improved performance (training throughput), consider enabling TunableOp.
By default, ``pyt_mpt30b_training`` runs with TunableOp disabled. To enable it,
run ``tools/run_models.py`` with the ``--tunableop on`` argument or edit the
``models.json`` configuration before running training.
Although this might increase the initial training time, it can result in a performance gain.
.. tab-item:: Standalone benchmarking
To set up the training environment, clone the
`<https://github.com/ROCm/MAD>`__ repo and build the Docker image. In
this snippet, the image is named ``mosaic_mpt30_image``.
.. code-block:: shell
git clone https://github.com/ROCm/MAD
cd MAD
docker build --build-arg MAD_SYSTEM_GPU_ARCHITECTURE=gfx942 -f docker/pyt_mpt30b_training.ubuntu.amd.Dockerfile -t mosaic_mpt30_image .
Start a ``mosaic_mpt30_image`` container using the following command.
.. code-block:: shell
docker run -it --device=/dev/kfd --device=/dev/dri --group-add=video --ipc=host --shm-size=8G mosaic_mpt30_image
In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
repository and navigate to the benchmark scripts directory at
``/workspace/MAD/scripts/pyt_mpt30b_training``.
.. code-block:: shell
git clone https://github.com/ROCm/MAD
cd MAD/scripts/pyt_mpt30b_training
To initiate the training process, use the following command. This script uses the hyperparameters defined in
``mpt-30b-instruct.yaml``.
.. code-block:: shell
source run.sh
.. note::
For improved performance (training throughput), consider enabling TunableOp.
To enable it, add the ``--tunableop on`` flag.
.. code-block:: shell
source run.sh --tunableop on
Although this might increase the initial training time, it can result in a performance gain.
Interpreting the output
=======================
The training output will be displayed in the terminal and simultaneously saved
to the ``output.txt`` file in the current directory. Key performance metrics will
also be extracted and appended to the ``perf_pyt_mpt30b_training.csv`` file.
Key performance metrics include:
- Training logs: Real-time display of loss metrics, accuracy, and training progress.
- Model checkpoints: Periodically saved model snapshots for potential resume or evaluation.
- Performance metrics: Detailed summaries of training speed and training loss metrics.
- Performance (throughput/samples_per_sec)
Overall throughput, measuring the total samples processed per second. Higher values indicate better hardware utilization.
- Performance per device (throughput/samples_per_sec)
Throughput on a per-device basis, showing how each GPU or CPU is performing.
- Language Cross Entropy (metrics/train/LanguageCrossEntropy)
Measures prediction accuracy. Lower cross entropy suggests the models output is closer to the expected distribution.
- Training loss (loss/train/total)
Overall training loss. A decreasing trend indicates the model is learning effectively.

View File

@@ -9,8 +9,7 @@ Training a model with PyTorch for ROCm
PyTorch is an open-source machine learning framework that is widely used for
model training with GPU-optimized components for transformer-based models.
The `PyTorch for ROCm training Docker <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
(``rocm/pytorch-training:v25.5``) image
The PyTorch for ROCm training Docker (``rocm/pytorch-training:v25.4``) image
provides a prebuilt optimized environment for fine-tuning and pretraining a
model on AMD Instinct MI325X and MI300X accelerators. It includes the following
software components to accelerate training workloads:
@@ -18,19 +17,19 @@ software components to accelerate training workloads:
+--------------------------+--------------------------------+
| Software component | Version |
+==========================+================================+
| ROCm | 6.3.4 |
| ROCm | 6.3.0 |
+--------------------------+--------------------------------+
| PyTorch | 2.7.0a0+git637433 |
+--------------------------+--------------------------------+
| Python | 3.10 |
+--------------------------+--------------------------------+
| Transformer Engine | 1.12.0.dev0+25a33da |
| Transformer Engine | 1.11 |
+--------------------------+--------------------------------+
| Flash Attention | 3.0.0 |
+--------------------------+--------------------------------+
| hipBLASLt | git53b53bf |
| hipBLASLt | git258a2162 |
+--------------------------+--------------------------------+
| Triton | 3.2.0 |
| Triton | 3.1 |
+--------------------------+--------------------------------+
.. _amd-pytorch-training-model-support:
@@ -40,8 +39,6 @@ Supported models
The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
* Llama 3.3 70B
* Llama 3.1 8B
* Llama 3.1 70B
@@ -77,358 +74,314 @@ popular AI models.
System validation
=================
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
If you have already validated your system settings, including NUMA
auto-balancing, skip this step. Otherwise, complete the :ref:`system validation
and optimization steps <train-a-model-system-validation>` to set up your system
before starting training.
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
Environment setup
=================
This Docker image is optimized for specific model configurations outlined
below. Performance can vary for other training workloads, as AMD
doesnt validate configurations and run conditions outside those described.
Benchmarking
============
Download the Docker image
-------------------------
Once the setup is complete, choose between two options to start benchmarking:
1. Use the following command to pull the Docker image from Docker Hub.
.. tab-set::
.. code-block:: shell
.. tab-item:: MAD-integrated benchmarking
docker pull rocm/pytorch-training:v25.4
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
directory and install the required packages on the host machine.
2. Run the Docker container.
.. code-block:: shell
.. code-block:: shell
git clone https://github.com/ROCm/MAD
cd MAD
pip install -r requirements.txt
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.4
For example, use this command to run the performance benchmark test on the Llama 3.1 8B model
using one GPU with the float16 data type on the host machine.
3. Use these commands if you exit the ``training_env`` container and need to return to it.
.. code-block:: shell
.. code-block:: shell
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
python3 tools/run_models.py --tags pyt_train_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
docker start training_env
docker exec -it training_env bash
The available models for MAD-integrated benchmarking are:
4. In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
repository and navigate to the benchmark scripts directory
``/workspace/MAD/scripts/pytorch_train``.
* ``pyt_train_llama-3.3-70b``
.. code-block:: shell
* ``pyt_train_llama-3.1-8b``
git clone https://github.com/ROCm/MAD
cd MAD/scripts/pytorch_train
* ``pyt_train_llama-3.1-70b``
Prepare training datasets and dependencies
------------------------------------------
* ``pyt_train_flux``
The following benchmarking examples require downloading models and datasets
from Hugging Face. To ensure successful access to gated repos, set your
``HF_TOKEN``.
MAD launches a Docker container with the name
``container_ci-pyt_train_llama-3.1-8b``, for example. The latency and throughput reports of the
model are collected in the following path: ``~/MAD/perf.csv``.
.. code-block:: shell
.. tab-item:: Standalone benchmarking
export HF_TOKEN=$your_personal_hugging_face_access_token
.. rubric:: Download the Docker image and required packages
Run the setup script to install libraries and datasets needed for benchmarking.
Use the following command to pull the Docker image from Docker Hub.
.. code-block:: shell
.. code-block:: shell
./pytorch_benchmark_setup.sh
docker pull rocm/pytorch-training:v25.5
``pytorch_benchmark_setup.sh`` installs the following libraries:
Run the Docker container.
.. list-table::
:header-rows: 1
.. code-block:: shell
* - Library
- Benchmark model
- Reference
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.5
* - ``accelerate``
- Llama 3.1 8B, FLUX
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
Use these commands if you exit the ``training_env`` container and need to return to it.
* - ``datasets``
- Llama 3.1 8B, 70B, FLUX
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
.. code-block:: shell
* - ``torchdata``
- Llama 3.1 70B
- `TorchData <https://pytorch.org/data/beta/index.html>`_
docker start training_env
docker exec -it training_env bash
* - ``tomli``
- Llama 3.1 70B
- `Tomli <https://pypi.org/project/tomli/>`_
In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
repository and navigate to the benchmark scripts directory
``/workspace/MAD/scripts/pytorch_train``.
* - ``tiktoken``
- Llama 3.1 70B
- `tiktoken <https://github.com/openai/tiktoken>`_
.. code-block:: shell
* - ``blobfile``
- Llama 3.1 70B
- `blobfile <https://pypi.org/project/blobfile/>`_
git clone https://github.com/ROCm/MAD
cd MAD/scripts/pytorch_train
* - ``tabulate``
- Llama 3.1 70B
- `tabulate <https://pypi.org/project/tabulate/>`_
.. rubric:: Prepare training datasets and dependencies
* - ``wandb``
- Llama 3.1 70B
- `Weights & Biases <https://github.com/wandb/wandb>`_
The following benchmarking examples require downloading models and datasets
from Hugging Face. To ensure successful access to gated repos, set your
``HF_TOKEN``.
* - ``sentencepiece``
- Llama 3.1 70B, FLUX
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
.. code-block:: shell
* - ``tensorboard``
- Llama 3.1 70 B, FLUX
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
export HF_TOKEN=$your_personal_hugging_face_access_token
* - ``csvkit``
- FLUX
- `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
Run the setup script to install libraries and datasets needed for benchmarking.
* - ``deepspeed``
- FLUX
- `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
.. code-block:: shell
* - ``diffusers``
- FLUX
- `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
./pytorch_benchmark_setup.sh
* - ``GitPython``
- FLUX
- `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
``pytorch_benchmark_setup.sh`` installs the following libraries:
* - ``opencv-python-headless``
- FLUX
- `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
.. list-table::
:header-rows: 1
* - ``peft``
- FLUX
- `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
* - Library
- Benchmark model
- Reference
* - ``protobuf``
- FLUX
- `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
* - ``accelerate``
- Llama 3.1 8B, FLUX
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
* - ``pytest``
- FLUX
- `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
* - ``datasets``
- Llama 3.1 8B, 70B, FLUX
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
* - ``python-dotenv``
- FLUX
- `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
* - ``torchdata``
- Llama 3.1 70B
- `TorchData <https://pytorch.org/data/beta/index.html>`_
* - ``seaborn``
- FLUX
- `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
* - ``tomli``
- Llama 3.1 70B
- `Tomli <https://pypi.org/project/tomli/>`_
* - ``transformers``
- FLUX
- `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
* - ``tiktoken``
- Llama 3.1 70B
- `tiktoken <https://github.com/openai/tiktoken>`_
``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
* - ``blobfile``
- Llama 3.1 70B
- `blobfile <https://pypi.org/project/blobfile/>`_
* `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
* - ``tabulate``
- Llama 3.1 70B
- `tabulate <https://pypi.org/project/tabulate/>`_
* `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
* - ``wandb``
- Llama 3.1 70B
- `Weights & Biases <https://github.com/wandb/wandb>`_
Along with the following datasets:
* - ``sentencepiece``
- Llama 3.1 70B, FLUX
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
* `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
* - ``tensorboard``
- Llama 3.1 70 B, FLUX
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
* `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
* - ``csvkit``
- FLUX
- `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
* - ``deepspeed``
- FLUX
- `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
Getting started
===============
* - ``diffusers``
- FLUX
- `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
The prebuilt PyTorch with ROCm training environment allows users to quickly validate
system performance, conduct training benchmarks, and achieve superior
performance for models like Llama 3.1 and Llama 2. This container should not be
expected to provide generalized performance across all training workloads. You
can expect the container to perform in the model configurations described in
the following section, but other configurations are not validated by AMD.
* - ``GitPython``
- FLUX
- `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
Use the following instructions to set up the environment, configure the script
to train models, and reproduce the benchmark results on MI325X and MI300X
accelerators with the AMD PyTorch training Docker image.
* - ``opencv-python-headless``
- FLUX
- `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
Once your environment is set up, use the following commands and examples to start benchmarking.
* - ``peft``
- FLUX
- `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
Pretraining
-----------
* - ``protobuf``
- FLUX
- `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
To start the pretraining benchmark, use the following command with the
appropriate options. See the following list of options and their descriptions.
* - ``pytest``
- FLUX
- `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
.. code-block:: shell
* - ``python-dotenv``
- FLUX
- `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
* - ``seaborn``
- FLUX
- `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
Options and available models
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* - ``transformers``
- FLUX
- `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
.. list-table::
:header-rows: 1
``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
* - Name
- Options
- Description
* `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
* - ``$training_mode``
- ``pretrain``
- Benchmark pretraining
* `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
* -
- ``finetune_fw``
- Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
Along with the following datasets:
* -
- ``finetune_lora``
- Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
* `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
* -
- ``HF_finetune_lora``
- Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
* `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
* - ``$datatype``
- ``FP8`` or ``BF16``
- Only Llama 3.1 8B supports FP8 precision.
* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
* - ``$model_repo``
- ``Llama-3.1-8B``
- `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
.. rubric:: Pretraining
* -
- ``Llama-3.1-70B``
- `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
To start the pretraining benchmark, use the following command with the
appropriate options. See the following list of options and their descriptions.
* -
- ``Llama-2-70B``
- `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
.. code-block:: shell
* -
- ``Flux``
- `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
* - ``$sequence_length``
- Sequence length for the language model.
- Between 2048 and 8192. 8192 by default.
.. list-table::
:header-rows: 1
.. note::
* - Name
- Options
- Description
Occasionally, downloading the Flux dataset might fail. In the event of this
error, manually download it from Hugging Face at
`black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
the required dataset.
* - ``$training_mode``
- ``pretrain``
- Benchmark pretraining
Fine-tuning
-----------
* -
- ``finetune_fw``
- Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
* -
- ``finetune_lora``
- Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
.. code-block:: shell
* -
- ``HF_finetune_lora``
- Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
* - ``$datatype``
- ``FP8`` or ``BF16``
- Only Llama 3.1 8B supports FP8 precision.
Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
`Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
* - ``$model_repo``
- ``Llama-3.3-70B``
- `Llama 3.3 70B <https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct>`_
.. code-block:: shell
* -
- ``Llama-3.1-8B``
- `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
* -
- ``Llama-3.1-70B``
- `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
Benchmarking examples
---------------------
* -
- ``Llama-2-70B``
- `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
Here are some examples of how to use the command.
* -
- ``Flux``
- `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
* Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
* - ``$sequence_length``
- Sequence length for the language model.
- Between 2048 and 8192. 8192 by default.
.. code-block:: shell
.. note::
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
Occasionally, downloading the Flux dataset might fail. In the event of this
error, manually download it from Hugging Face at
`black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
the required dataset.
* Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
.. rubric:: Fine-tuning
.. code-block:: shell
To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
.. code-block:: shell
* Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
.. code-block:: shell
Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
`Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
.. code-block:: shell
* Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
.. code-block:: shell
.. rubric:: Benchmarking examples
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
Here are some example commands to get started pretraining and fine-tuning with various model configurations.
* Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
* Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
.. code-block:: shell
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
* Example 6: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
* Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
.. code-block:: shell
.. code-block:: shell
./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
* Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
.. code-block:: shell
./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
* Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
* Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
* Example 6: Torchtune full weight fine-tuning with Llama-3.3-70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.3-70B
* Example 7: Torchtune LoRA fine-tuning with Llama-3.3-70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.3-70B
* Example 8: Torchtune QLoRA fine-tuning with Llama-3.3-70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t finetune_qlora -p BF16 -m Llama-3.3-70B
* Example 9: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
.. code-block:: shell
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
Previous versions
=================
@@ -446,13 +399,6 @@ benchmarking, see the version-specific documentation.
- PyTorch version
- Resources
* - v25.4
- 6.3.0
- 2.7.0a0+git637433
-
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.4/images/sha256-fa98a9aa69968e654466c06f05aaa12730db79b48b113c1ab4f7a5fe6920a20b>`_
* - v25.3
- 6.3.0
- 2.7.0a0+git637433

View File

@@ -21,12 +21,8 @@ In this guide, you'll learn about:
- Training a model
- :doc:`With Megatron-LM <benchmark-docker/megatron-lm>`
- :doc:`Train a model with Megatron-LM <benchmark-docker/megatron-lm>`
- :doc:`With PyTorch <benchmark-docker/pytorch-training>`
- :doc:`With JAX MaxText <benchmark-docker/jax-maxtext>`
- :doc:`With LLM Foundry <benchmark-docker/mpt-llm-foundry>`
- :doc:`Train a model with PyTorch <benchmark-docker/pytorch-training>`
- :doc:`Scaling model training <scale-model-training>`

View File

@@ -5,13 +5,12 @@
:keywords: ROCm, AI, LLM, train, megatron, Llama, tutorial, docker, torch, pytorch, jax
.. _train-a-model-system-validation:
.. _rocm-for-ai-system-optimization:
**********************************************************
Prerequisite system validation before running AI workloads
**********************************************************
**********************************************
Prerequisite system validation before training
**********************************************
Complete the following system validation and optimization steps to set up your system before starting training and inference.
Complete the following system validation and optimization steps to set up your system before starting training.
Disable NUMA auto-balancing
---------------------------
@@ -27,8 +26,7 @@ the output is ``1``, run the following command to disable NUMA auto-balancing.
sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
See `Disable NUMA auto-balancing <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#disable-numa-auto-balancing>`_
in the Instinct documentation for more information.
See :ref:`mi300x-disable-numa` for more information.
Hardware verification with ROCm
-------------------------------
@@ -44,8 +42,7 @@ Run the command:
rocm-smi --setperfdeterminism 1900
See `Hardware verfication for ROCm <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#hardware-verification-with-rocm>`_
in the Instinct documentation for more information.
See :ref:`mi300x-hardware-verification-with-rocm` for more information.
RCCL Bandwidth Test for multi-node setups
-----------------------------------------

View File

@@ -45,7 +45,6 @@
(communication-libraries)=
* {doc}`RCCL <rccl:index>`
* {doc}`rocSHMEM <rocshmem:index>`
:::
:::{grid-item-card} Math

View File

@@ -281,31 +281,13 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- SGPR File (KiB)
- GFXIP Major version
- GFXIP Minor version
*
- Radeon AI PRO R9700
- RDNA4
- gfx1201
- 16
- 64
- 32 or 64
- 128
- 64
- 8
- N/A
- 32
- 16
- 32
- 768
- 32
- 12
- 0
*
- Radeon PRO V710
- RDNA3
- gfx1101
- 28
- 54
- 32 or 64
- 32
- 128
- 56
- 4
@@ -314,7 +296,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -323,7 +305,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 48
- 96
- 32 or 64
- 32
- 128
- 96
- 6
@@ -332,7 +314,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -341,7 +323,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 48
- 96
- 32 or 64
- 32
- 128
- 96
- 6
@@ -350,7 +332,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -359,7 +341,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 48
- 70
- 32 or 64
- 32
- 128
- 96
- 6
@@ -368,7 +350,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -377,7 +359,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 32
- 70
- 32 or 64
- 32
- 128
- 64
- 6
@@ -386,7 +368,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -395,7 +377,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1101
- 16
- 48
- 32 or 64
- 32
- 128
- 64
- 4
@@ -404,7 +386,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -413,7 +395,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 32
- 60
- 32 or 64
- 32
- 128
- 128
- 4
@@ -422,7 +404,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -431,7 +413,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1032
- 8
- 28
- 32 or 64
- 32
- 128
- 32
- 2
@@ -440,7 +422,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -449,7 +431,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 32
- 72
- 32 or 64
- 32
- 128
- 128
- 4
@@ -458,7 +440,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -467,7 +449,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1012
- 8
- 22
- 32 or 64
- 32
- 128
-
- 4
@@ -522,85 +504,13 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- SGPR File (KiB)
- GFXIP Major version
- GFXIP Minor version
*
- Radeon RX 9070 XT
- RDNA4
- gfx1201
- 16
- 64
- 32 or 64
- 128
- 64
- 8
- N/A
- 32
- 16
- 32
- 768
- 32
- 12
- 0
*
- Radeon RX 9070 GRE
- RDNA4
- gfx1201
- 16
- 48
- 32 or 64
- 128
- 48
- 6
- N/A
- 32
- 16
- 32
- 768
- 32
- 12
- 0
*
- Radeon RX 9070
- RDNA4
- gfx1201
- 16
- 56
- 32 or 64
- 128
- 64
- 8
- N/A
- 32
- 16
- 32
- 768
- 32
- 12
- 0
*
- Radeon RX 9060 XT
- RDNA4
- gfx1200
- 16
- 32
- 32 or 64
- 128
- 32
- 4
- N/A
- 32
- 16
- 32
- 768
- 32
- 12
- 0
*
- Radeon RX 7900 XTX
- RDNA3
- gfx1100
- 24
- 96
- 32 or 64
- 32
- 128
- 96
- 6
@@ -609,7 +519,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -618,7 +528,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 20
- 84
- 32 or 64
- 32
- 128
- 80
- 6
@@ -627,7 +537,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -636,7 +546,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1100
- 16
- 80
- 32 or 64
- 32
- 128
- 64
- 6
@@ -645,7 +555,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -654,7 +564,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1101
- 16
- 60
- 32 or 64
- 32
- 128
- 64
- 4
@@ -663,7 +573,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -672,7 +582,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1101
- 12
- 54
- 32 or 64
- 32
- 128
- 48
- 4
@@ -681,7 +591,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 768
- 32
- 16
- 11
- 0
*
@@ -690,7 +600,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1102
- 8
- 32
- 32 or 64
- 32
- 128
- 32
- 2
@@ -699,7 +609,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 11
- 0
*
@@ -708,7 +618,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 16
- 80
- 32 or 64
- 32
- 128
- 128
- 4
@@ -717,7 +627,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -726,7 +636,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 16
- 80
- 32 or 64
- 32
- 128
- 128
- 4
@@ -735,7 +645,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -744,7 +654,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 16
- 72
- 32 or 64
- 32
- 128
- 128
- 4
@@ -753,7 +663,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -762,7 +672,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1030
- 16
- 60
- 32 or 64
- 32
- 128
- 128
- 4
@@ -771,7 +681,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -780,7 +690,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1031
- 12
- 40
- 32 or 64
- 32
- 128
- 96
- 3
@@ -789,7 +699,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -798,7 +708,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1031
- 12
- 40
- 32 or 64
- 32
- 128
- 96
- 3
@@ -807,7 +717,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -816,7 +726,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1031
- 10
- 36
- 32 or 64
- 32
- 128
- 80
- 3
@@ -825,7 +735,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -834,7 +744,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1032
- 8
- 32
- 32 or 64
- 32
- 128
- 32
- 2
@@ -843,7 +753,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -852,7 +762,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1032
- 8
- 32
- 32 or 64
- 32
- 128
- 32
- 2
@@ -861,7 +771,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*
@@ -870,7 +780,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- gfx1032
- 8
- 28
- 32 or 64
- 32
- 128
- 32
- 2
@@ -879,7 +789,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
- 16
- 32
- 512
- 32
- 16
- 10
- 3
*

View File

@@ -10,7 +10,6 @@
| Version | Release date |
| ------- | ------------ |
| [6.4.1](https://rocm.docs.amd.com/en/docs-6.4.1/) | May 21, 2025 |
| [6.4.0](https://rocm.docs.amd.com/en/docs-6.4.0/) | April 11, 2025 |
| [6.3.3](https://rocm.docs.amd.com/en/docs-6.3.3/) | February 19, 2025 |
| [6.3.2](https://rocm.docs.amd.com/en/docs-6.3.2/) | January 28, 2025 |

View File

@@ -36,22 +36,16 @@ subtrees:
title: Use ROCm for AI
subtrees:
- entries:
- file: how-to/rocm-for-ai/install.rst
title: Installation
- file: how-to/rocm-for-ai/system-health-check.rst
title: System health benchmarks
- file: how-to/rocm-for-ai/training/index.rst
title: Training
subtrees:
- entries:
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm
title: Train a model with Megatron-LM
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training
title: Train a model with PyTorch
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext
title: Train a model with JAX MaxText
- file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
title: Train a model with LLM Foundry
- file: how-to/rocm-for-ai/training/scale-model-training.rst
title: Scale model training
@@ -74,14 +68,14 @@ subtrees:
title: Inference
subtrees:
- entries:
- file: how-to/rocm-for-ai/inference/install.rst
title: Installation
- file: how-to/rocm-for-ai/inference/hugging-face-models.rst
title: Run models from Hugging Face
- file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
title: LLM inference frameworks
- file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
title: vLLM inference performance testing
- file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
title: PyTorch inference performance testing
- file: how-to/rocm-for-ai/inference/vllm-benchmark.rst
title: Performance testing
- file: how-to/rocm-for-ai/inference/deploy-your-model.rst
title: Deploy your model

View File

@@ -1,4 +1,4 @@
rocm-docs-core==1.20.0
rocm-docs-core==1.18.2
sphinx-reredirects
sphinx-sitemap
sphinxcontrib.datatemplates==0.11.0

View File

@@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile requirements.in
# pip-compile docs/sphinx/requirements.in
#
accessible-pygments==0.0.5
# via pydata-sphinx-theme
@@ -10,73 +10,74 @@ alabaster==1.0.0
# via sphinx
asttokens==3.0.0
# via stack-data
attrs==25.3.0
attrs==25.1.0
# via
# jsonschema
# jupyter-cache
# referencing
babel==2.17.0
babel==2.16.0
# via
# pydata-sphinx-theme
# sphinx
beautifulsoup4==4.13.4
beautifulsoup4==4.12.3
# via pydata-sphinx-theme
breathe==4.36.0
breathe==4.35.0
# via rocm-docs-core
certifi==2025.4.26
certifi==2024.8.30
# via requests
cffi==1.17.1
# via
# cryptography
# pynacl
charset-normalizer==3.4.2
charset-normalizer==3.4.0
# via requests
click==8.2.1
click==8.1.7
# via
# jupyter-cache
# sphinx-external-toc
comm==0.2.2
# via ipykernel
cryptography==45.0.3
cryptography==44.0.1
# via pyjwt
debugpy==1.8.14
debugpy==1.8.12
# via ipykernel
decorator==5.2.1
decorator==5.1.1
# via ipython
defusedxml==0.7.1
# via sphinxcontrib-datatemplates
deprecated==1.2.18
deprecated==1.2.15
# via pygithub
docutils==0.21.2
# via
# breathe
# myst-parser
# pydata-sphinx-theme
# sphinx
exceptiongroup==1.3.0
exceptiongroup==1.2.2
# via ipython
executing==2.2.0
# via stack-data
fastjsonschema==2.21.1
fastjsonschema==2.20.0
# via
# nbformat
# rocm-docs-core
gitdb==4.0.12
gitdb==4.0.11
# via gitpython
gitpython==3.1.44
gitpython==3.1.43
# via rocm-docs-core
greenlet==3.2.2
greenlet==3.1.1
# via sqlalchemy
idna==3.10
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==8.7.0
importlib-metadata==8.6.1
# via
# jupyter-cache
# myst-nb
ipykernel==6.29.5
# via myst-nb
ipython==8.36.0
ipython==8.31.0
# via
# ipykernel
# myst-nb
@@ -86,9 +87,9 @@ jinja2==3.1.6
# via
# myst-parser
# sphinx
jsonschema==4.24.0
jsonschema==4.23.0
# via nbformat
jsonschema-specifications==2025.4.1
jsonschema-specifications==2024.10.1
# via jsonschema
jupyter-cache==1.0.1
# via myst-nb
@@ -96,7 +97,7 @@ jupyter-client==8.6.3
# via
# ipykernel
# nbclient
jupyter-core==5.8.1
jupyter-core==5.7.2
# via
# ipykernel
# jupyter-client
@@ -116,9 +117,9 @@ mdit-py-plugins==0.4.2
# via myst-parser
mdurl==0.1.2
# via markdown-it-py
myst-nb==1.2.0
myst-nb==1.1.2
# via rocm-docs-core
myst-parser==4.0.1
myst-parser==4.0.0
# via myst-nb
nbclient==0.10.2
# via
@@ -131,20 +132,19 @@ nbformat==5.10.4
# nbclient
nest-asyncio==1.6.0
# via ipykernel
packaging==25.0
packaging==24.2
# via
# ipykernel
# pydata-sphinx-theme
# sphinx
parso==0.8.4
# via jedi
pexpect==4.9.0
# via ipython
platformdirs==4.3.8
platformdirs==4.3.6
# via jupyter-core
prompt-toolkit==3.0.51
prompt-toolkit==3.0.50
# via ipython
psutil==7.0.0
psutil==6.1.1
# via ipykernel
ptyprocess==0.7.0
# via pexpect
@@ -152,19 +152,19 @@ pure-eval==0.2.3
# via stack-data
pycparser==2.22
# via cffi
pydata-sphinx-theme==0.15.4
pydata-sphinx-theme==0.16.0
# via
# rocm-docs-core
# sphinx-book-theme
pygithub==2.6.1
pygithub==2.5.0
# via rocm-docs-core
pygments==2.19.1
pygments==2.18.0
# via
# accessible-pygments
# ipython
# pydata-sphinx-theme
# sphinx
pyjwt[crypto]==2.10.1
pyjwt[crypto]==2.10.0
# via pygithub
pynacl==1.5.0
# via pygithub
@@ -178,7 +178,7 @@ pyyaml==6.0.2
# rocm-docs-core
# sphinx-external-toc
# sphinxcontrib-datatemplates
pyzmq==26.4.0
pyzmq==26.2.0
# via
# ipykernel
# jupyter-client
@@ -190,19 +190,19 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.20.0
rocm-docs-core==1.18.2
# via -r requirements.in
rpds-py==0.25.1
rpds-py==0.22.3
# via
# jsonschema
# referencing
six==1.17.0
# via python-dateutil
smmap==5.0.2
smmap==5.0.1
# via gitdb
snowballstemmer==3.0.1
snowballstemmer==2.2.0
# via sphinx
soupsieve==2.7
soupsieve==2.6
# via beautifulsoup4
sphinx==8.1.3
# via
@@ -220,7 +220,7 @@ sphinx==8.1.3
# sphinx-sitemap
# sphinxcontrib-datatemplates
# sphinxcontrib-runcmd
sphinx-book-theme==1.1.4
sphinx-book-theme==1.1.3
# via rocm-docs-core
sphinx-copybutton==0.5.2
# via rocm-docs-core
@@ -228,7 +228,7 @@ sphinx-design==0.6.1
# via rocm-docs-core
sphinx-external-toc==1.0.1
# via rocm-docs-core
sphinx-notfound-page==1.1.0
sphinx-notfound-page==1.0.4
# via rocm-docs-core
sphinx-reredirects==0.1.6
# via -r requirements.in
@@ -250,13 +250,13 @@ sphinxcontrib-runcmd==0.2.0
# via sphinxcontrib-datatemplates
sphinxcontrib-serializinghtml==2.0.0
# via sphinx
sqlalchemy==2.0.41
sqlalchemy==2.0.37
# via jupyter-cache
stack-data==0.6.3
# via ipython
tabulate==0.9.0
# via jupyter-cache
tomli==2.2.1
tomli==2.1.0
# via sphinx
tornado==6.4.2
# via
@@ -272,23 +272,21 @@ traitlets==5.14.3
# matplotlib-inline
# nbclient
# nbformat
typing-extensions==4.13.2
typing-extensions==4.12.2
# via
# beautifulsoup4
# exceptiongroup
# ipython
# myst-nb
# pydata-sphinx-theme
# pygithub
# referencing
# sqlalchemy
urllib3==2.4.0
urllib3==2.2.3
# via
# pygithub
# requests
wcwidth==0.2.13
# via prompt-toolkit
wrapt==1.17.2
wrapt==1.17.0
# via deprecated
zipp==3.22.0
zipp==3.21.0
# via importlib-metadata

View File

@@ -10,7 +10,7 @@ ROCm is a software stack, composed primarily of open-source software, that
provides the tools for programming AMD Graphics Processing Units (GPUs), from
low-level kernels to high-level end-user applications.
.. image:: data/rocm-software-stack-6_4_0.jpg
.. image:: data/rocm-software-stack-6_3_2.jpg
:width: 800
:alt: AMD's ROCm software stack and enabling technologies.
:align: center
@@ -52,7 +52,6 @@ Communication
:header: "Component", "Description"
":doc:`RCCL <rccl:index>`", "Standalone library that provides multi-GPU and multi-node collective communication primitives"
":doc:`rocSHMEM <rocshmem:index>`", "An intra-kernel networking library that provides GPU-centric networking through an OpenSHMEM-like interface"
Math
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -117,11 +116,6 @@ Performance
":doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`", "Toolkit for developing analysis tools for profiling and tracing GPU compute applications. This toolkit is in beta and subject to change"
":doc:`ROCTracer <roctracer:index>`", "Intercepts runtime API calls and traces asynchronous activity"
.. note::
`ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is a tool for visualizing and analyzing GPU thread trace data collected using :doc:`rocprofv3 <rocprofiler-sdk:index>`.
Note that `ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is in an early access state. Running production workloads is not recommended.
Development
^^^^^^^^^^^

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-6.4.1"
<default revision="refs/tags/rocm-6.4.0"
remote="rocm-org"
sync-c="true"
sync-j="4" />

View File

@@ -1,79 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-6.4.1"
remote="rocm-org"
sync-c="true"
sync-j="4" />
<!--list of projects for ROCm-->
<project name="ROCm" revision="roc-6.4.x" />
<project name="ROCK-Kernel-Driver" />
<project name="ROCR-Runtime" />
<project name="amdsmi" />
<project name="rdc" />
<project name="rocm_bandwidth_test" />
<project name="rocm_smi_lib" />
<project name="rocm-core" />
<project name="rocm-examples" />
<project name="rocminfo" />
<project name="rocprofiler" />
<project name="rocprofiler-register" />
<project name="rocprofiler-sdk" />
<project name="rocprofiler-compute" />
<project name="rocprofiler-systems" />
<project name="roctracer" />
<!--HIP Projects-->
<project name="HIP" />
<project name="hip-tests" />
<project name="HIPIFY" />
<project name="clr" />
<project name="hipother" />
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
<project name="half" />
<project name="llvm-project" />
<project name="spirv-llvm-translator" />
<!-- gdb projects -->
<project name="ROCdbgapi" />
<project name="ROCgdb" />
<project name="rocr_debug_agent" />
<!-- ROCm Libraries -->
<project groups="mathlibs" name="AMDMIGraphX" />
<project groups="mathlibs" name="MIOpen" />
<project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="Tensile" />
<project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipBLAS-common" />
<project groups="mathlibs" name="hipBLAS" />
<project groups="mathlibs" name="hipBLASLt" />
<project groups="mathlibs" name="hipCUB" />
<project groups="mathlibs" name="hipFFT" />
<project groups="mathlibs" name="hipRAND" />
<project groups="mathlibs" name="hipSOLVER" />
<project groups="mathlibs" name="hipSPARSE" />
<project groups="mathlibs" name="hipSPARSELt" />
<project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" />
<project groups="mathlibs" name="rocAL" />
<project groups="mathlibs" name="rocALUTION" />
<project groups="mathlibs" name="rocBLAS" />
<project groups="mathlibs" name="rocDecode" />
<project groups="mathlibs" name="rocJPEG" />
<project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocFFT" />
<project groups="mathlibs" name="rocPRIM" />
<project groups="mathlibs" name="rocRAND" />
<project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocSOLVER" />
<project groups="mathlibs" name="rocSPARSE" />
<project groups="mathlibs" name="rocThrust" />
<project groups="mathlibs" name="rocWMMA" />
<project groups="mathlibs" name="rocm-cmake" />
<project groups="mathlibs" name="rpp" />
<project groups="mathlibs" name="TransferBench" />
<!-- Projects for OpenMP-Extras -->
<project name="aomp" path="openmp-extras/aomp" />
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
<project name="flang" path="openmp-extras/flang" />
</manifest>