mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-12 08:08:03 -05:00
Compare commits
19 Commits
rocm-submo
...
falcon
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
54ba8bfed1 | ||
|
|
55e13a3c38 | ||
|
|
d1debc7e45 | ||
|
|
169f3bbe5e | ||
|
|
e28eac2fe1 | ||
|
|
97ccce10ef | ||
|
|
217fb452f8 | ||
|
|
a7c158a14f | ||
|
|
85778177a1 | ||
|
|
28060c104b | ||
|
|
84177354de | ||
|
|
7458fcb7ab | ||
|
|
16d6e59003 | ||
|
|
fd9f576b26 | ||
|
|
a66bc1d85e | ||
|
|
36b6ffaf7c | ||
|
|
40e4ba3ecc | ||
|
|
1f41ce26be | ||
|
|
9293723381 |
@@ -214,7 +214,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: MIOpen
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex test_rnn_seq_api'
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml --exclude-regex "test_rnn_seq_api|GPU_Conv2dTuningAsm_FP32"'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -89,6 +89,8 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
@@ -122,6 +124,8 @@ jobs:
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -147,4 +151,3 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
optSymLink: true
|
||||
|
||||
@@ -67,6 +67,7 @@ parameters:
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
|
||||
251
.gitmodules
vendored
251
.gitmodules
vendored
@@ -1,251 +0,0 @@
|
||||
[submodule "submodule-srcs/ROCR-Runtime"]
|
||||
path = submodule-srcs/ROCR-Runtime
|
||||
url = https://github.com/rocm/ROCR-Runtime
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/amdsmi"]
|
||||
path = submodule-srcs/amdsmi
|
||||
url = https://github.com/rocm/amdsmi
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rdc"]
|
||||
path = submodule-srcs/rdc
|
||||
url = https://github.com/rocm/rdc
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocm_bandwidth_test"]
|
||||
path = submodule-srcs/rocm_bandwidth_test
|
||||
url = https://github.com/rocm/rocm_bandwidth_test
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocm_smi_lib"]
|
||||
path = submodule-srcs/rocm_smi_lib
|
||||
url = https://github.com/rocm/rocm_smi_lib
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocm-core"]
|
||||
path = submodule-srcs/rocm-core
|
||||
url = https://github.com/rocm/rocm-core
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocm-examples"]
|
||||
path = submodule-srcs/rocm-examples
|
||||
url = https://github.com/rocm/rocm-examples
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocminfo"]
|
||||
path = submodule-srcs/rocminfo
|
||||
url = https://github.com/rocm/rocminfo
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocprofiler"]
|
||||
path = submodule-srcs/rocprofiler
|
||||
url = https://github.com/rocm/rocprofiler
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocprofiler-register"]
|
||||
path = submodule-srcs/rocprofiler-register
|
||||
url = https://github.com/rocm/rocprofiler-register
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocprofiler-sdk"]
|
||||
path = submodule-srcs/rocprofiler-sdk
|
||||
url = https://github.com/rocm/rocprofiler-sdk
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocprofiler-compute"]
|
||||
path = submodule-srcs/rocprofiler-compute
|
||||
url = https://github.com/rocm/rocprofiler-compute
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocprofiler-systems"]
|
||||
path = submodule-srcs/rocprofiler-systems
|
||||
url = https://github.com/rocm/rocprofiler-systems
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/roctracer"]
|
||||
path = submodule-srcs/roctracer
|
||||
url = https://github.com/rocm/roctracer
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/HIP"]
|
||||
path = submodule-srcs/HIP
|
||||
url = https://github.com/rocm/HIP
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hip-tests"]
|
||||
path = submodule-srcs/hip-tests
|
||||
url = https://github.com/rocm/hip-tests
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/HIPIFY"]
|
||||
path = submodule-srcs/HIPIFY
|
||||
url = https://github.com/rocm/HIPIFY
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/clr"]
|
||||
path = submodule-srcs/clr
|
||||
url = https://github.com/rocm/clr
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipother"]
|
||||
path = submodule-srcs/hipother
|
||||
url = https://github.com/rocm/hipother
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/half"]
|
||||
path = submodule-srcs/half
|
||||
url = https://github.com/rocm/half
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/llvm-project"]
|
||||
path = submodule-srcs/llvm-project
|
||||
url = https://github.com/rocm/llvm-project
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/spirv-llvm-translator"]
|
||||
path = submodule-srcs/spirv-llvm-translator
|
||||
url = https://github.com/rocm/spirv-llvm-translator
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/ROCdbgapi"]
|
||||
path = submodule-srcs/ROCdbgapi
|
||||
url = https://github.com/rocm/ROCdbgapi
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/ROCgdb"]
|
||||
path = submodule-srcs/ROCgdb
|
||||
url = https://github.com/rocm/ROCgdb
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocr_debug_agent"]
|
||||
path = submodule-srcs/rocr_debug_agent
|
||||
url = https://github.com/rocm/rocr_debug_agent
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/AMDMIGraphX"]
|
||||
path = submodule-srcs/AMDMIGraphX
|
||||
url = https://github.com/rocm/AMDMIGraphX
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/MIOpen"]
|
||||
path = submodule-srcs/MIOpen
|
||||
url = https://github.com/rocm/MIOpen
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/MIVisionX"]
|
||||
path = submodule-srcs/MIVisionX
|
||||
url = https://github.com/rocm/MIVisionX
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/ROCmValidationSuite"]
|
||||
path = submodule-srcs/ROCmValidationSuite
|
||||
url = https://github.com/rocm/ROCmValidationSuite
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/Tensile"]
|
||||
path = submodule-srcs/Tensile
|
||||
url = https://github.com/rocm/Tensile
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/composable_kernel"]
|
||||
path = submodule-srcs/composable_kernel
|
||||
url = https://github.com/rocm/composable_kernel
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipBLAS-common"]
|
||||
path = submodule-srcs/hipBLAS-common
|
||||
url = https://github.com/rocm/hipBLAS-common
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipBLAS"]
|
||||
path = submodule-srcs/hipBLAS
|
||||
url = https://github.com/rocm/hipBLAS
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipBLASLt"]
|
||||
path = submodule-srcs/hipBLASLt
|
||||
url = https://github.com/rocm/hipBLASLt
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipCUB"]
|
||||
path = submodule-srcs/hipCUB
|
||||
url = https://github.com/rocm/hipCUB
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipFFT"]
|
||||
path = submodule-srcs/hipFFT
|
||||
url = https://github.com/rocm/hipFFT
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipRAND"]
|
||||
path = submodule-srcs/hipRAND
|
||||
url = https://github.com/rocm/hipRAND
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipSOLVER"]
|
||||
path = submodule-srcs/hipSOLVER
|
||||
url = https://github.com/rocm/hipSOLVER
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipSPARSE"]
|
||||
path = submodule-srcs/hipSPARSE
|
||||
url = https://github.com/rocm/hipSPARSE
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipSPARSELt"]
|
||||
path = submodule-srcs/hipSPARSELt
|
||||
url = https://github.com/rocm/hipSPARSELt
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipTensor"]
|
||||
path = submodule-srcs/hipTensor
|
||||
url = https://github.com/rocm/hipTensor
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/hipfort"]
|
||||
path = submodule-srcs/hipfort
|
||||
url = https://github.com/rocm/hipfort
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rccl"]
|
||||
path = submodule-srcs/rccl
|
||||
url = https://github.com/rocm/rccl
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocAL"]
|
||||
path = submodule-srcs/rocAL
|
||||
url = https://github.com/rocm/rocAL
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocALUTION"]
|
||||
path = submodule-srcs/rocALUTION
|
||||
url = https://github.com/rocm/rocALUTION
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocBLAS"]
|
||||
path = submodule-srcs/rocBLAS
|
||||
url = https://github.com/rocm/rocBLAS
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocDecode"]
|
||||
path = submodule-srcs/rocDecode
|
||||
url = https://github.com/rocm/rocDecode
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocJPEG"]
|
||||
path = submodule-srcs/rocJPEG
|
||||
url = https://github.com/rocm/rocJPEG
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocPyDecode"]
|
||||
path = submodule-srcs/rocPyDecode
|
||||
url = https://github.com/rocm/rocPyDecode
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocFFT"]
|
||||
path = submodule-srcs/rocFFT
|
||||
url = https://github.com/rocm/rocFFT
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocPRIM"]
|
||||
path = submodule-srcs/rocPRIM
|
||||
url = https://github.com/rocm/rocPRIM
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocRAND"]
|
||||
path = submodule-srcs/rocRAND
|
||||
url = https://github.com/rocm/rocRAND
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocSHMEM"]
|
||||
path = submodule-srcs/rocSHMEM
|
||||
url = https://github.com/rocm/rocSHMEM
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocSOLVER"]
|
||||
path = submodule-srcs/rocSOLVER
|
||||
url = https://github.com/rocm/rocSOLVER
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocSPARSE"]
|
||||
path = submodule-srcs/rocSPARSE
|
||||
url = https://github.com/rocm/rocSPARSE
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocThrust"]
|
||||
path = submodule-srcs/rocThrust
|
||||
url = https://github.com/rocm/rocThrust
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocWMMA"]
|
||||
path = submodule-srcs/rocWMMA
|
||||
url = https://github.com/rocm/rocWMMA
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rocm-cmake"]
|
||||
path = submodule-srcs/rocm-cmake
|
||||
url = https://github.com/rocm/rocm-cmake
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/rpp"]
|
||||
path = submodule-srcs/rpp
|
||||
url = https://github.com/rocm/rpp
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/TransferBench"]
|
||||
path = submodule-srcs/TransferBench
|
||||
url = https://github.com/rocm/TransferBench
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/openmp-extras/aomp"]
|
||||
path = submodule-srcs/openmp-extras/aomp
|
||||
url = https://github.com/rocm/aomp
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/openmp-extras/aomp-extras"]
|
||||
path = submodule-srcs/openmp-extras/aomp-extras
|
||||
url = https://github.com/rocm/aomp-extras
|
||||
branch = release/rocm-rel-6.4
|
||||
[submodule "submodule-srcs/ROCK-Kernel-Driver"]
|
||||
path = submodule-srcs/ROCK-Kernel-Driver
|
||||
url = https://github.com/rocm/ROCK-Kernel-Driver
|
||||
110
CHANGELOG.md
110
CHANGELOG.md
@@ -4,9 +4,117 @@ This page is a historical overview of changes made to ROCm components. This
|
||||
consolidated changelog documents key modifications and improvements across
|
||||
different versions of the ROCm software stack and its components.
|
||||
|
||||
## ROCm 6.4.1
|
||||
|
||||
See the [ROCm 6.4.1 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
|
||||
for a complete overview of this release.
|
||||
|
||||
### **AMD SMI** (25.4.2)
|
||||
|
||||
#### Added
|
||||
|
||||
* Dumping CPER entries from RAS tool `amdsmi_get_gpu_cper_entries()` to Python and C APIs.
|
||||
- Dumping CPER entries consist of `amdsmi_cper_hdr_t`.
|
||||
- Dumping CPER entries is also enabled in the CLI interface through `sudo amd-smi ras --cper`.
|
||||
|
||||
#### Resolved
|
||||
|
||||
* Fixed partition enumeration in `amd-smi list -e`, `amdsmi_get_gpu_enumeration_info()`, `amdsmi_enumeration_info_t`, `drm_card`, and `drm_render` fields.
|
||||
|
||||
```{note}
|
||||
See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **HIP** (6.4.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* New debug mask, to print precise code object information for logging.
|
||||
|
||||
#### Changed
|
||||
|
||||
* Calling the code object has changed. HIP runtime now uses device bitcode before SPIR-V.
|
||||
|
||||
#### Optimized
|
||||
|
||||
* Improved kernel logging using the demangling shader names.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Stale state during the graph capture. The return error was fixed, and HIP runtime now always uses the latest dependent nodes during `hipEventRecord` capture.
|
||||
* Issue of `hipEventRecords` failing to call the `hip::getStream` runtime function.
|
||||
|
||||
### **hipBLASLt** (0.12.1)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an accuracy issue for some solutions using an `FP32` or `TF32` data type with a TT transpose.
|
||||
|
||||
### **RCCL** (2.22.3)
|
||||
|
||||
#### Changed
|
||||
|
||||
* MSCCL++ is now disabled by default. To enable it, set `RCCL_MSCCLPP_ENABLE=1`.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where early termination, in rare circumstances, could cause the application to stop responding by adding synchronization before destroying a proxy thread.
|
||||
* Fixed the accuracy issue for the MSCCLPP `allreduce7` kernel in graph mode.
|
||||
|
||||
### **rocALUTION** (3.2.3)
|
||||
|
||||
#### Added
|
||||
|
||||
* The `-a` option has been added to the `rmake.py` build script. This option allows you to select specific architectures when building on Microsoft Windows.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed an issue where the `HIP_PATH` environment variable was being ignored when compiling on Microsoft Windows.
|
||||
|
||||
### **ROCm Data Center Tool** (0.3.0)
|
||||
|
||||
#### Added
|
||||
|
||||
- Support for GPU partitions.
|
||||
- `RDC_FI_GPU_BUSY_PERCENT` metric.
|
||||
|
||||
#### Changed
|
||||
|
||||
- Updated `rdc_field` to align with `rdc_bootstrap` for current metrics.
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed [ROCProfiler](https://rocm.docs.amd.com/projects/rocprofiler/en/docs-6.4.0/index.html) eval metrics and memory leaks.
|
||||
|
||||
### **ROCm SMI** (7.5.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
- Fixed partition enumeration. It now refers to the correct DRM Render and Card paths.
|
||||
|
||||
```{note}
|
||||
See the full [ROCm SMI changelog](https://github.com/ROCm/rocm_smi_lib/blob/release/rocm-rel-6.4/CHANGELOG.md) for details, examples, and in-depth descriptions.
|
||||
```
|
||||
|
||||
### **ROCm Systems Profiler** (1.0.1)
|
||||
|
||||
#### Added
|
||||
|
||||
* How-to document for [network performance profiling](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/amd-staging/how-to/nic-profiling.html) for standard Network Interface Cards (NICs).
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a build issue with Dyninst on GCC 13.
|
||||
|
||||
### **ROCr Runtime** (1.15.0)
|
||||
|
||||
#### Resolved issues
|
||||
|
||||
* Fixed a rare occurrence issue on AMD Instinct MI25, MI50, and MI100 GPUs, where the `SDMA` copies might start before the dependent Kernel finishes and could cause memory corruption.
|
||||
|
||||
## ROCm 6.4.0
|
||||
|
||||
See the [ROCm 6.4.0 release notes](https://rocm-stg.amd.com/en/latest/about/release-notes.html)
|
||||
See the [ROCm 6.4.0 release notes](https://rocm.docs.amd.com/en/docs-6.4.0/about/release-notes.html)
|
||||
for a complete overview of this release.
|
||||
|
||||
### **AMD SMI** (25.3.0)
|
||||
|
||||
81
Manifest6.4.0
Normal file
81
Manifest6.4.0
Normal file
@@ -0,0 +1,81 @@
|
||||
This XML file does not appear to have any style information associated with it. The document tree is shown below.
|
||||
<manifest>
|
||||
<remote name="gerritgit" fetch="ssh://gerritgit/" review="gerrit-git.amd.com"/>
|
||||
<remote name="lightning-ghemu" fetch="ssh://github-emu/AMD-Lightning-Internal"/>
|
||||
<remote name="rocm" fetch="https://github.com/ROCm"/>
|
||||
<remote name="rocm-ghemu" fetch="ssh://github-emu/AMD-ROCm-Internal"/>
|
||||
<default remote="gerritgit" revision="release/rocm-rel-6.4" sync-j="4" sync-c="true"/>
|
||||
<project name="AMDMIGraphX" remote="rocm" revision="908b94a3f0822a4fee89d99c3cfc51cd9c93f2f6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIOpen" remote="rocm" revision="f10c6ed8085cfabf8877294ab44301d8180999e8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="MIVisionX" remote="rocm" revision="a2b69e5b30f2dbdf66055ec99a2b5559b572f7af" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="OpenCL-CLHPP" remote="rocm-ghemu" revision="6f7e82dee83aea7f277a4b874da309902ea51f6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="OpenCL-Headers" remote="rocm-ghemu" revision="848d67b6fd471318816a81601d469b086487d18e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCR-Runtime" remote="rocm-ghemu" revision="1d9f08cabd33bd6302add72d0be2bfe0e64eea3a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCdbgapi" remote="rocm-ghemu" revision="59be7ff0aaafe82feb78f30990c8fdf62838cc98" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCgdb" remote="rocm-ghemu" revision="401bb21f2f3c72bbb90ccce12dc3ef481f9a1d8a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="ROCmValidationSuite" remote="rocm" revision="5f1a9665f6241b0346c88cfd21a6073628da3593" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="Tensile" remote="rocm" revision="be49885fce2a61b600ae4593f1c2d00c8b4fa11e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="TransferBench" remote="rocm" revision="3ea2f226ec818158ba97e4ee0ec0b589f13f4641" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="amdsmi" remote="rocm-ghemu" revision="e6a209ef809f1b09a424572afd685ec754a9042b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="aomp" path="openmp-extras/aomp" remote="lightning-ghemu" revision="24932c59c0759a57ee52d327d9a10a2e466e35a7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" remote="lightning-ghemu" revision="6f8038ada9dec082ea091d30c98c0834669d12a1" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="aqlprofile" remote="rocm-ghemu" revision="7fae75ec6bf7b1a631707ae859542d733f8a1f43" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="build-infra" path="ROCm" remote="rocm-ghemu" revision="811ec9cc6d1588bf66619365b9b4db96ac6acf68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra"/>
|
||||
<project name="clr" remote="rocm-ghemu" revision="a1adcfdd44f4560c0268e36c8afeb94f760dc963" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="composable_kernel" remote="rocm" revision="a8c5bd9b9ad950c3e742877e01cb784da91664e3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="compute/ec/hip-examples" path="HIP-Examples" revision="41b0cff8077a25390c2bbda827eb9f6f37ec1ef3" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/hip-examples-private" path="hip-examples-private" revision="dc69edb405804987753735a369478503d82ce9c2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="compute/ec/jenkins-utils" path="jenkins-utils" revision="bb517b014ff055b62d3860addc23ddd06b0c3e6e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/ml-framework-ci" path="ml-framework-ci" revision="83440e22ebf1e9443b6df737224c1e5e2b91e0c4" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,framework"/>
|
||||
<project name="compute/ec/packaging/meta" path="meta" revision="c7cffa2e4199da1fd68b8b3568282dd59d49a4df" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/prototype" path="build" revision="d71a2766e11e057e5c698caea8fc4ebc0f72cb3e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="compute/ec/rocm_bandwidth_test" path="rocm_bandwidth_test" revision="84b8ddd2686be9bd3e438126b44e6bb10d94d522" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="flang" path="openmp-extras/flang" remote="lightning-ghemu" revision="390169508a03cecf85d43f5cee41e223355f598f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="half" remote="rocm" revision="1ddada225144cac0de8f6b5c0dd9acffd99a2e68" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hip" remote="rocm-ghemu" revision="22b0b2eb9a09e30dca11b213872127f9caa2e1e7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hip-tests" remote="rocm-ghemu" revision="dc28111737706aad93e38c2f746ccbc13dbf1b80" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="hipBLAS" remote="rocm" revision="0a335435e9c8a833d7106e4ae5057eb58cea2fef" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLAS-common" remote="rocm" revision="7c1566ba4628e777b91511242899b6df48555d04" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipBLASLt" remote="rocm" revision="4d62e135cfb4008cf7b508995cad347a1bc750c8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipCUB" remote="rocm" revision="a6005943c5804535990429925318e7900eb6e801" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipFFT" remote="rocm" revision="396169c84a2bb3c7ed7245caefe66002138e7c6c" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipRAND" remote="rocm" revision="d2516cc199690fd91abfdc5908ecfd88e3553067" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSOLVER" remote="rocm" revision="ca0de3c9c95df4345b76cd8a56e72c84b7d5fc79" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSE" remote="rocm" revision="a6c62e48eb8a2326475f7bbb4705c5b926a5edc8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipSPARSELt" remote="rocm" revision="f3f4f590a49ae9f9c9ce1451c42db4c2bfd00eed" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipTensor" remote="rocm" revision="e5529b92914be79e4887a92b48b30f88b616c9a5" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipfort" remote="rocm" revision="f3d6aa3e8657d665a43fa2815ca2e49ce39a464a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="hipify" path="HIPIFY" remote="lightning-ghemu" revision="ed0de49132211c6ddbd40f5cd89b5841e832ac3d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="hipother" remote="rocm-ghemu" revision="49b1588f834dbe1a4db1bddb3647a91b15f618b8" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="llvm-project" remote="lightning-ghemu" revision="aa0c041cb49bb50af268504907b7899fec59ae4e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
<project name="rccl" remote="rocm" revision="12f8f61f3a5db87bf158c60fdd5e38a32c903b08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rdc" remote="rocm-ghemu" revision="0224310c872df0fae56ffc883c50c7f47dc82870" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocAL" remote="rocm" revision="373ef865aca43528559e7a9134f09e49a9e9b7c6" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocALUTION" remote="rocm" revision="cb256de3574a4fcbc6a52ed5986b787173cd6dc2" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocBLAS" remote="rocm" revision="80e5394d6a68901ce48b03da47b33b1e69d58be7" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocDecode" remote="rocm" revision="a2a7b63cad8f90a94e21232b44460a8fb2d52304" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocFFT" remote="rocm" revision="058ba87fdcfdae334dbc8dbe048955b248e9328a" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocJPEG" remote="rocm" revision="73d36d35d90137ffbfcec276bdf973823ef0c0b9" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPRIM" remote="rocm" revision="d8771ec18ad45c4d697800c22fb21241f22a915f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocPyDecode" remote="rocm" revision="848e49d29d4d6173fb4b57a9223ce68c049baa28" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocRAND" remote="rocm" revision="4d5d3a88d1898705dadf5c06e7b0400d51a13c36" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSHMEM" remote="rocm" revision="7702b3c0f3f41baf6a80aa6b22fa90dec1a6801e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSOLVER" remote="rocm" revision="db754e3f55daab54abb86f17cd6b4066c504e163" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocSPARSE" remote="rocm" revision="4953add0aee37ad26700e8bcd6defbfa6b3a4d08" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocThrust" remote="rocm" revision="6bf2777019827e1a2898547ced9a03bf5024ed7d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocWMMA" remote="rocm" revision="1a5b6231663fcf3e00abf790aeae843278f16a65" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-cmake" remote="rocm" revision="ecc716b97c2239cff00422ed7a43cd52a0839a0e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="rocm-core" remote="rocm-ghemu" revision="73dae9c82ace4fb8e1e4028f86ff0365f21c9f51" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="infra,stage1"/>
|
||||
<project name="rocm-examples" remote="rocm" revision="3bbd2987a3b46cfd2c8348c2317042f3ad604e38" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocm_smi_lib" remote="rocm-ghemu" revision="1f242d314916336d6ce5c731f486edfaa8f0b987" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocminfo" remote="rocm-ghemu" revision="6ea2ba38c8e1ab2899acf66878148b1192fd0bee" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler" remote="rocm-ghemu" revision="40da7312a06f8052f5c148a4709cab64686f881d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-compute" remote="rocm" revision="7b25d958b4e030ea64a24ed0a62dcac1e48193ab" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-register-internal" path="rocprofiler-register" remote="rocm-ghemu" revision="7c6cd44f637d400b50b803b0b351be302ad6827d" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-sdk-internal" path="rocprofiler-sdk" remote="rocm-ghemu" revision="e8e49fe76971000a42a5a177d9a727d16dd0ebcf" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocprofiler-systems" remote="rocm" revision="2e945e4a08781e13a822f568814e2c434fd8858f" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rocr_debug_agent" remote="rocm-ghemu" revision="9eec1a52a36b5203bbac54a1b442fe9a45b6a43e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="roctracer" remote="rocm-ghemu" revision="f55a6943816641c081aa167c8a45904ddae2ba5e" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="stage1"/>
|
||||
<project name="rpp" remote="rocm" revision="5fb204ca7018b87889e061b720c5b06f6b9bce9b" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="mathlibs"/>
|
||||
<project name="spirv-llvm-translator" path="llvm-project/llvm/projects/SPIRV-LLVM-Translator" remote="lightning-ghemu" revision="ae12ddbec86765df369b18ac764e170082079819" upstream="release/rocm-rel-6.4" dest-branch="release/rocm-rel-6.4" groups="llvmdeps,stage1"/>
|
||||
</manifest>
|
||||
44
README.md
44
README.md
@@ -20,17 +20,12 @@ source software compilers, debuggers, and libraries. ROCm is fully integrated in
|
||||
(ML) frameworks, such as PyTorch and TensorFlow.
|
||||
|
||||
## Getting the ROCm Source Code
|
||||
AMD ROCm is built from open source software. It is, therefore, possible to modify the various components of ROCm by downloading the source code and rebuilding the components. AMD ROCm is built from open source software. It is, therefore, possible to modify the various components of ROCm by downloading the source code and rebuilding the components. The source code for ROCm components can be cloned from each of the GitHub repositories using git.
|
||||
|
||||
There are two methods to clone/sync the ROCm sources. you can use either of the methods to sync the ROCm Sources
|
||||
|
||||
## [Method 1]
|
||||
|
||||
For easy access to download the correct versions of each of these tools, the ROCm repository contains a repo manifest file called [default.xml](./default.xml). You can use this manifest file to download the source code for ROCm software.
|
||||
AMD ROCm is built from open source software. It is, therefore, possible to modify the various components of ROCm by downloading the source code and rebuilding the components. The source code for ROCm components can be cloned from each of the GitHub repositories using git. For easy access to download the correct versions of each of these tools, the ROCm repository contains a repo manifest file called [default.xml](./default.xml). You can use this manifest file to download the source code for ROCm software.
|
||||
|
||||
### Installing the repo tool
|
||||
|
||||
We need the repo tool to work with the manifest file. The repo tool from Google allows you to manage multiple git repositories simultaneously. Run the following commands to install the repo tool:
|
||||
The repo tool from Google allows you to manage multiple git repositories simultaneously. Run the following commands to install the repo tool:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/bin/
|
||||
@@ -48,12 +43,11 @@ Some ROCm projects use the Git Large File Storage (LFS) format that may require
|
||||
sudo apt-get install git-lfs
|
||||
```
|
||||
|
||||
### Downloading the ROCm source code
|
||||
|
||||
The following example shows how to use the repo tool to download the ROCm source code. If you choose a directory other than ~/bin/ to install the repo tool, you must use that chosen directory in the code as shown below:
|
||||
|
||||
```bash
|
||||
# --------------------------------------
|
||||
# Step1: clone source code
|
||||
# --------------------------------------
|
||||
mkdir -p ~/ROCm/
|
||||
cd ~/ROCm/
|
||||
export ROCM_VERSION=6.4.0
|
||||
@@ -63,35 +57,29 @@ export ROCM_VERSION=6.4.0
|
||||
|
||||
**Note:** Using this sample code will cause the repo tool to download the open source code associated with the specified ROCm release. Ensure that you have ssh-keys configured on your machine for your GitHub ID prior to the download as explained at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh).
|
||||
|
||||
|
||||
## [Method 2]
|
||||
|
||||
This repository contains the source code for ROCm. Below you will find instructions for cloning the repository using submodules as an alternative to using the `repo` tool.
|
||||
|
||||
## Cloning with Git Submodules
|
||||
|
||||
As an alternative method, you can clone this repository and its submodules using Git's submodule functionality. This approach may be preferred if you are familiar with Git and wish to avoid using the `repo` tool.
|
||||
|
||||
To clone the repository along with all its submodules, use the following command:
|
||||
|
||||
```bash
|
||||
# --------------------------------------
|
||||
# Step1: clone source code
|
||||
# --------------------------------------
|
||||
git clone --recurse-submodules --remote-submodules https://github.com/ROCm/ROCm.git
|
||||
cd ROCm/submodule-srcs
|
||||
```
|
||||
## Building the ROCm source code
|
||||
|
||||
Each ROCm component repository contains directions for building that component, such as the rocSPARSE documentation [Installation and Building for Linux](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html). Refer to the specific component documentation for instructions on building the repository.
|
||||
|
||||
Each release of the ROCm software supports specific hardware and software configurations. Refer to [System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) for the current supported hardware and OS.
|
||||
|
||||
## Build ROCm from source
|
||||
|
||||
The Build will use as many processors as it can find to build in parallel. Some of the compiles can consume as much as 10GB of RAM, so make sure you have plenty of Swap Space !
|
||||
|
||||
By default the ROCm build will compile for all supported GPU architectures and will take approximately 500 CPU hours.
|
||||
The Build time will reduce significantly if we limit the GPU Architecture/s against which we need to build by using the environment variable GPU_ARCHS as mentioned below.
|
||||
|
||||
```bash
|
||||
# --------------------------------------
|
||||
# Step1: clone source code
|
||||
# --------------------------------------
|
||||
|
||||
mkdir -p ~/WORKSPACE/ # Or any folder name other than WORKSPACE
|
||||
cd ~/WORKSPACE/
|
||||
export ROCM_VERSION=6.4.0
|
||||
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.4.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
|
||||
~/bin/repo sync
|
||||
|
||||
# --------------------------------------
|
||||
# Step 2: Prepare build environment
|
||||
|
||||
1391
RELEASE.md
1391
RELEASE.md
File diff suppressed because it is too large
Load Diff
@@ -81,6 +81,7 @@ additional licenses. Please review individual repositories for more information.
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
|
||||
@@ -1,121 +1,122 @@
|
||||
ROCm Version,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0.25104,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
ROCm Version,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
||||
|
@@ -23,15 +23,15 @@ compatibility and system requirements.
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.0", "6.3.3", "6.2.0"
|
||||
:header: "ROCm Version", "6.4.1", "6.4.0", "6.3.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4"
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3"
|
||||
,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9"
|
||||
,"SLES 15 SP6","SLES 15 SP6, SP5","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_,Oracle Linux 8.9 [#mi300x]_
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
@@ -43,106 +43,107 @@ compatibility and system requirements.
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942 [#mi300_620]_
|
||||
,gfx942,gfx942,gfx942
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.31,0.4.26
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.20,1.17.3,1.17.3
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.3.2,2.2.0
|
||||
CUB,2.5.0,2.3.2,2.2.0
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x"
|
||||
KMD versions,"6.4.x, 6.3.x","6.4.x, 6.3.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.11.0,2.10.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.1.0,3.0.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.1.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.8.0,0.6.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.6.0,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.2.0,0.1.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.1,1.8.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.21.5,2.20.5
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_ ,2.0.0,2.0.0,N/A
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.3.0,2.2.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.0,0.10.0,0.8.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.17,1.0.14
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.5.1,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.11.1,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.3.0,2.2.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.1.2,3.1.1
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.2,0.2.1
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.2,3.2.1,3.2.0
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.3.0,4.2.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.31,1.0.28
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.2.0,3.1.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.27.0,3.26.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.6.0,1.5.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.42.0,4.41.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.0,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.2,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.0,4.4.0,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.0,3.28.0,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.4.0,1.3.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.3.0,3.2.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.0.1
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43482,6.3.42134,6.2.41133
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.0,6.3.3,6.2.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,20240607.1.4246
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43482,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.1,6.4.0,6.3.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.3.0,24.7.1,24.6.2
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.4.2,25.3.0,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.4.0,7.3.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.0.60200
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.5.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.0.0,2.0.1
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.0,0.1.2,1.11.2
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60400,2.0.60303,2.0.60200
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.5.0,0.4.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60400,4.1.60303,4.1.60200
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.0,3.1.0,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.1,1.0.0,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60401,2.0.60400,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60401,4.1.60400,4.1.60300
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.13.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.0,0.76.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,14.2.0
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.3,2.0.3
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25133,18.0.0.25012,18.0.0.24232
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25172,19.0.0.25133,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25172,19.0.0.25133,18.0.0.24491
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43482,6.3.42134,6.2.41133
|
||||
:doc:`HIP <hip:index>`,6.4.43482,6.3.42134,6.2.41133
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43483,6.4.43482,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43483,6.4.43482,6.3.42131
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.14.0,1.13.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
@@ -21,31 +21,68 @@ release cycles for PyTorch on ROCm:
|
||||
|
||||
- ROCm PyTorch release:
|
||||
|
||||
- Provides the latest version of ROCm but doesn't immediately support the latest stable PyTorch
|
||||
version.
|
||||
- Provides the latest version of ROCm but might not necessarily support the
|
||||
latest stable PyTorch version.
|
||||
|
||||
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
|
||||
pre-installed.
|
||||
preinstalled.
|
||||
|
||||
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`_
|
||||
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>` to get started.
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
to get started.
|
||||
|
||||
- Official PyTorch release:
|
||||
|
||||
- Provides the latest stable version of PyTorch but doesn't immediately support the latest ROCm version.
|
||||
- Provides the latest stable version of PyTorch but might not necessarily
|
||||
support the latest ROCm version.
|
||||
|
||||
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`_
|
||||
|
||||
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`_
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_ to get started.
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`_
|
||||
to get started.
|
||||
|
||||
The upstream PyTorch includes an automatic HIPification solution that automatically generates HIP
|
||||
source code from the CUDA backend. This approach allows PyTorch to support ROCm without requiring
|
||||
manual code modifications.
|
||||
PyTorch includes tooling that generates HIP source code from the CUDA backend.
|
||||
This approach allows PyTorch to support ROCm without requiring manual code
|
||||
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
|
||||
|
||||
Development of ROCm is aligned with the stable release of PyTorch while upstream PyTorch testing uses
|
||||
the stable release of ROCm to maintain consistency.
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency.
|
||||
|
||||
.. _pytorch-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/benchmark-docker/pytorch-training>`
|
||||
guides how to leverage the ROCm platform for training AI models. It covers the
|
||||
steps, tools, and best practices for optimizing training workflows on AMD GPUs
|
||||
using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning
|
||||
and inference of machine learning models, particularly large language models
|
||||
(LLMs), on systems with a single GPU. This topic provides a detailed guide for
|
||||
setting up, optimizing, and executing fine-tuning and inference workflows in
|
||||
such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning
|
||||
models on systems with multiple GPUs.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. This guide helps users achieve optimal performance for
|
||||
deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the
|
||||
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
|
||||
GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
|
||||
|
||||
.. _pytorch-docker-compat:
|
||||
|
||||
@@ -56,10 +93,10 @@ Docker image compatibility
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are validated for `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
Click the |docker-icon| icon to view the image on Docker Hub.
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: PyTorch Docker image components
|
||||
:header-rows: 1
|
||||
@@ -212,13 +249,12 @@ Click the |docker-icon| icon to view the image on Docker Hub.
|
||||
- `4.0.3 <https://github.com/open-mpi/ompi/tree/v4.0.3>`_
|
||||
- `5.3-1.0.5.0 <https://content.mellanox.com/ofed/MLNX_OFED-5.3-1.0.5.0/MLNX_OFED_LINUX-5.3-1.0.5.0-ubuntu20.04-x86_64.tgz>`_
|
||||
|
||||
Critical ROCm libraries for PyTorch
|
||||
Key ROCm libraries for PyTorch
|
||||
================================================================================
|
||||
|
||||
The functionality of PyTorch with ROCm is determined by its underlying library
|
||||
dependencies. These critical ROCm components affect the capabilities,
|
||||
performance, and feature set available to developers. The versions described
|
||||
are available in ROCm :version:`rocm_version`.
|
||||
PyTorch functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
@@ -238,24 +274,23 @@ are available in ROCm :version:`rocm_version`.
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations like matrix multiplication, matrix-vector products,
|
||||
and tensor contractions. Utilized in both dense and batched linear
|
||||
algebra operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- It accelerates operations like ``torch.matmul``, ``torch.mm``, and the
|
||||
- Accelerates operations such as ``torch.matmul``, ``torch.mm``, and the
|
||||
matrix multiplications used in convolutional and linear layers.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
- Supports operations like ``torch.sum``, ``torch.cumsum``, ``torch.sort``
|
||||
and ``torch.topk``. Operations on sparse tensors or tensors with
|
||||
irregular shapes often involve scanning, sorting, and filtering, which
|
||||
hipCUB handles efficiently.
|
||||
- Supports operations such as ``torch.sum``, ``torch.cumsum``,
|
||||
``torch.sort`` irregular shapes often involve scanning, sorting, and
|
||||
filtering, which hipCUB handles efficiently.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
@@ -263,8 +298,8 @@ are available in ROCm :version:`rocm_version`.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- Provides fast random number generation for GPUs.
|
||||
- The ``torch.rand``, ``torch.randn`` and stochastic layers like
|
||||
``torch.nn.Dropout``.
|
||||
- The ``torch.rand``, ``torch.randn``, and stochastic layers like
|
||||
``torch.nn.Dropout`` rely on hipRAND.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
@@ -335,7 +370,7 @@ are available in ROCm :version:`rocm_version`.
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
- Easy to integrate into PyTorch's ``torch.utils.data`` and
|
||||
``torchvision`` data load workloads.
|
||||
``torchvision`` data load workloads to speed up data processing.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
@@ -352,11 +387,11 @@ are available in ROCm :version:`rocm_version`.
|
||||
involve matrix products, such as ``torch.matmul``, ``torch.bmm``, and
|
||||
more.
|
||||
|
||||
Supported and unsupported features
|
||||
Supported features
|
||||
================================================================================
|
||||
|
||||
The following section maps GPU-accelerated PyTorch features to their supported
|
||||
ROCm and PyTorch versions.
|
||||
This section maps GPU-accelerated PyTorch features to their supported ROCm and
|
||||
PyTorch versions.
|
||||
|
||||
torch
|
||||
--------------------------------------------------------------------------------
|
||||
@@ -364,23 +399,24 @@ torch
|
||||
`torch <https://pytorch.org/docs/stable/index.html>`_ is the central module of
|
||||
PyTorch, providing data structures for multi-dimensional tensors and
|
||||
implementing mathematical operations on them. It also includes utilities for
|
||||
efficient serialization of tensors and arbitrary data types, along with various
|
||||
other tools.
|
||||
efficient serialization of tensors and arbitrary data types and other tools.
|
||||
|
||||
Tensor data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The data type of a tensor is specified using the ``dtype`` attribute or argument, and PyTorch supports a wide range of data types for different use cases.
|
||||
The tensor data type is specified using the ``dtype`` attribute or argument.
|
||||
PyTorch supports many data types for different use cases.
|
||||
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_'s single data types:
|
||||
The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors.html>`_
|
||||
single data types:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data type
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.float8_e4m3fn``
|
||||
- 8-bit floating point, e4m3
|
||||
- 2.3
|
||||
@@ -472,11 +508,11 @@ The following table lists `torch.Tensor <https://pytorch.org/docs/stable/tensors
|
||||
|
||||
.. note::
|
||||
|
||||
Unsigned types aside from ``uint8`` are currently only have limited support in
|
||||
eager mode (they primarily exist to assist usage with ``torch.compile``).
|
||||
Unsigned types except ``uint8`` have limited support in eager mode. They
|
||||
primarily exist to assist usage with ``torch.compile``.
|
||||
|
||||
The :doc:`ROCm precision support page <rocm:reference/precision-support>`
|
||||
collected the native HW support of different data types.
|
||||
See :doc:`ROCm precision support <rocm:reference/precision-support>` for the
|
||||
native hardware support of data types.
|
||||
|
||||
torch.cuda
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@@ -491,8 +527,8 @@ leveraging ROCm and CUDA as the underlying frameworks.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Device management
|
||||
- Utilities for managing and interacting with GPUs.
|
||||
- 0.4.0
|
||||
@@ -566,8 +602,8 @@ PyTorch interacts with the ROCm or CUDA environment.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``cufft_plan_cache``
|
||||
- Manages caching of GPU FFT plans to optimize repeated FFT computations.
|
||||
- 1.7.0
|
||||
@@ -615,8 +651,8 @@ Supported ``torch`` options include:
|
||||
|
||||
* - Option
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``allow_tf32``
|
||||
- TensorFloat-32 tensor cores may be used in cuDNN convolutions on NVIDIA
|
||||
Ampere or newer GPUs.
|
||||
@@ -631,28 +667,28 @@ Supported ``torch`` options include:
|
||||
Automatic mixed precision: torch.amp
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
PyTorch that automates the process of using both 16-bit (half-precision,
|
||||
float16) and 32-bit (single-precision, float32) floating-point types in model
|
||||
training and inference.
|
||||
PyTorch automates the process of using both 16-bit (half-precision, float16) and
|
||||
32-bit (single-precision, float32) floating-point types in model training and
|
||||
inference.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - Autocasting
|
||||
- Instances of autocast serve as context managers or decorators that allow
|
||||
- Autocast instances serve as context managers or decorators that allow
|
||||
regions of your script to run in mixed precision.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - Gradient scaling
|
||||
- To prevent underflow, “gradient scaling” multiplies the network’s
|
||||
loss(es) by a scale factor and invokes a backward pass on the scaled
|
||||
loss(es). Gradients flowing backward through the network are then
|
||||
scaled by the same factor. In other words, gradient values have a
|
||||
larger magnitude, so they don’t flush to zero.
|
||||
loss by a scale factor and invokes a backward pass on the scaled
|
||||
loss. The same factor then scales gradients flowing backward through
|
||||
the network. In other words, gradient values have a larger magnitude so
|
||||
that they don’t flush to zero.
|
||||
- 1.9
|
||||
- 2.5
|
||||
* - CUDA op-specific behavior
|
||||
@@ -666,7 +702,7 @@ training and inference.
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The PyTorch distributed library includes a collective of parallelism modules, a
|
||||
PyTorch distributed library includes a collective of parallelism modules, a
|
||||
communications layer, and infrastructure for launching and debugging large
|
||||
training jobs. See :ref:`rocm-for-ai-pytorch-distributed` for more information.
|
||||
|
||||
@@ -680,13 +716,13 @@ of computational resources and scalability for large-scale tasks.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - TensorPipe
|
||||
- A point-to-point communication library integrated into
|
||||
PyTorch for distributed training. It is designed to handle tensor data
|
||||
transfers efficiently between different processes or devices, including
|
||||
those on separate machines.
|
||||
PyTorch for distributed training. It handles tensor data transfers
|
||||
efficiently between different processes or devices, including those on
|
||||
separate machines.
|
||||
- 1.8
|
||||
- 5.4
|
||||
* - Gloo
|
||||
@@ -705,8 +741,8 @@ torch.compiler
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- Since ROCm
|
||||
- As of PyTorch
|
||||
- As of ROCm
|
||||
* - ``torch.compiler`` (AOT Autograd)
|
||||
- Autograd captures not only the user-level code, but also backpropagation,
|
||||
which results in capturing the backwards pass “ahead-of-time”. This
|
||||
@@ -729,8 +765,8 @@ The `torchaudio <https://pytorch.org/audio/stable/index.html>`_ library provides
|
||||
utilities for processing audio data in PyTorch, such as audio loading,
|
||||
transformations, and feature extraction.
|
||||
|
||||
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to move audio
|
||||
data (waveform tensor) explicitly to GPU using ``.to('cuda')``.
|
||||
To ensure GPU-acceleration with ``torchaudio.transforms``, you need to
|
||||
explicitly move audio data (waveform tensor) to GPU using ``.to('cuda')``.
|
||||
|
||||
The following ``torchaudio`` features are GPU-accelerated.
|
||||
|
||||
@@ -739,10 +775,10 @@ The following ``torchaudio`` features are GPU-accelerated.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since torchaudio version
|
||||
- Since ROCm
|
||||
- As of torchaudio version
|
||||
- As of ROCm
|
||||
* - ``torchaudio.transforms.Spectrogram``
|
||||
- Generates spectrogram of an input waveform using STFT.
|
||||
- Generate a spectrogram of an input waveform using STFT.
|
||||
- 0.6.0
|
||||
- 4.5
|
||||
* - ``torchaudio.transforms.MelSpectrogram``
|
||||
@@ -762,7 +798,7 @@ torchvision
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `torchvision <https://pytorch.org/vision/stable/index.html>`_ library
|
||||
provide datasets, model architectures, and common image transformations for
|
||||
provides datasets, model architectures, and common image transformations for
|
||||
computer vision.
|
||||
|
||||
The following ``torchvision`` features are GPU-accelerated.
|
||||
@@ -772,8 +808,8 @@ The following ``torchvision`` features are GPU-accelerated.
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since torchvision version
|
||||
- Since ROCm
|
||||
- As of torchvision version
|
||||
- As of ROCm
|
||||
* - ``torchvision.transforms.functional``
|
||||
- Provides GPU-compatible transformations for image preprocessing like
|
||||
resize, normalize, rotate and crop.
|
||||
@@ -819,7 +855,7 @@ torchtune
|
||||
The `torchtune <https://pytorch.org/torchtune/stable/index.html>`_ library for
|
||||
authoring, fine-tuning and experimenting with LLMs.
|
||||
|
||||
* Usage: It works out-of-the-box, enabling developers to fine-tune ROCm PyTorch solutions.
|
||||
* Usage: Enabling developers to fine-tune ROCm PyTorch solutions.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
@@ -830,7 +866,8 @@ The `torchserve <https://pytorch.org/serve/>`_ is a PyTorch domain library
|
||||
for common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
|
||||
* torchtext does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
@@ -841,14 +878,16 @@ The `torchrec <https://pytorch.org/torchrec/>`_ is a PyTorch domain library for
|
||||
common sparsity and parallelism primitives needed for large-scale recommender
|
||||
systems.
|
||||
|
||||
* torchrec does not implement its own kernels. ROCm support is enabled by linking against ROCm libraries.
|
||||
* torchrec does not implement its own kernels. ROCm support is enabled by
|
||||
linking against ROCm libraries.
|
||||
|
||||
* Only official release exists.
|
||||
|
||||
Unsupported PyTorch features
|
||||
----------------------------
|
||||
================================================================================
|
||||
|
||||
The following are GPU-accelerated PyTorch features not currently supported by ROCm.
|
||||
The following GPU-accelerated PyTorch features are not supported by ROCm for
|
||||
the listed supported PyTorch versions.
|
||||
|
||||
.. list-table::
|
||||
:widths: 30, 60, 10
|
||||
@@ -856,7 +895,7 @@ The following are GPU-accelerated PyTorch features not currently supported by RO
|
||||
|
||||
* - Feature
|
||||
- Description
|
||||
- Since PyTorch
|
||||
- As of PyTorch
|
||||
* - APEX batch norm
|
||||
- Use APEX batch norm instead of PyTorch batch norm.
|
||||
- 1.6.0
|
||||
@@ -912,31 +951,3 @@ The following are GPU-accelerated PyTorch features not currently supported by RO
|
||||
utilized effectively through custom CUDA extensions or advanced
|
||||
workflows.
|
||||
- Not a core feature
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* :doc:`Using ROCm for AI: training a model </how-to/rocm-for-ai/training/train-a-model>` provides
|
||||
guidance on how to leverage the ROCm platform for training AI models. It covers the steps, tools, and best practices
|
||||
for optimizing training workflows on AMD GPUs using PyTorch features.
|
||||
|
||||
* :doc:`Single-GPU fine-tuning and inference </how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates how to use the ROCm platform for the fine-tuning and inference of
|
||||
machine learning models, particularly large language models (LLMs), on systems with a single AMD
|
||||
Instinct MI300X accelerator. This page provides a detailed guide for setting up, optimizing, and
|
||||
executing fine-tuning and inference workflows in such environments.
|
||||
|
||||
* :doc:`Multi-GPU fine-tuning and inference optimization </how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference>`
|
||||
describes and demonstrates the fine-tuning and inference of machine learning models on systems
|
||||
with multi MI300X accelerators.
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>` provides detailed
|
||||
guidance on optimizing workloads for the AMD Instinct MI300X accelerator using ROCm. This guide is aimed at helping
|
||||
users achieve optimal performance for deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the use of PyTorch on the ROCm platform and
|
||||
focuses on how to efficiently leverage AMD GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
For more use cases and recommendations, see `ROCm PyTorch blog posts <https://rocm.blogs.amd.com/blog/tag/pytorch.html>`_.
|
||||
|
||||
@@ -34,15 +34,15 @@ project = "ROCm Documentation"
|
||||
project_path = os.path.abspath(".").replace("\\", "/")
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.4.0"
|
||||
release = "6.4.0"
|
||||
version = "6.4.1"
|
||||
release = "6.4.1"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-04-11"},
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-05-07"},
|
||||
{"file": "release/changelog", "os": ["linux"],},
|
||||
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
pull_tag: rocm/vllm:instinct_main
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250311/images/sha256-de0a2649b735f45b7ecab8813eb7b19778ae1f40591ca1196b07bc29c42ed4a3
|
||||
pull_tag: rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250415
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845
|
||||
rocm_version: 6.3.1
|
||||
vllm_version: 0.7.3
|
||||
vllm_version: 0.8.3
|
||||
pytorch_version: 2.7.0 (dev nightly)
|
||||
hipblaslt_version: 0.13
|
||||
model_groups:
|
||||
@@ -102,19 +102,12 @@ vllm_benchmark:
|
||||
model_repo: Qwen/Qwen2-72B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
|
||||
precision: float16
|
||||
- group: JAIS
|
||||
tag: jais
|
||||
models:
|
||||
- model: JAIS 13B
|
||||
mad_tag: pyt_vllm_jais-13b
|
||||
model_repo: core42/jais-13b-chat
|
||||
url: https://huggingface.co/core42/jais-13b-chat
|
||||
precision: float16
|
||||
- model: JAIS 30B
|
||||
mad_tag: pyt_vllm_jais-30b
|
||||
model_repo: core42/jais-30b-chat-v3
|
||||
url: https://huggingface.co/core42/jais-30b-chat-v3
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
tunableop: true
|
||||
- group: DBRX
|
||||
tag: dbrx
|
||||
models:
|
||||
@@ -157,3 +150,11 @@ vllm_benchmark:
|
||||
model_repo: deepseek-ai/deepseek-moe-16b-chat
|
||||
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
|
||||
precision: float16
|
||||
- group: TII Falcon
|
||||
tag: falcon
|
||||
models:
|
||||
- model: Falcon 180B
|
||||
mad_tag: pyt_vllm_falcon-180b
|
||||
model_repo: tiiuae/falcon-180B
|
||||
url: https://huggingface.co/tiiuae/falcon-180B
|
||||
precision: float16
|
||||
|
||||
BIN
docs/data/rocm-software-stack-6_4_0.jpg
Normal file
BIN
docs/data/rocm-software-stack-6_4_0.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
@@ -1,15 +1,178 @@
|
||||
.. meta::
|
||||
:description: How to use model quantization techniques to speed up inference.
|
||||
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, GPTQ, transformers, bitsandbytes
|
||||
:keywords: ROCm, LLM, fine-tuning, usage, tutorial, quantization, Quark, GPTQ, transformers, bitsandbytes
|
||||
|
||||
*****************************
|
||||
Model quantization techniques
|
||||
*****************************
|
||||
|
||||
Quantization reduces the model size compared to its native full-precision version, making it easier to fit large models
|
||||
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using GPTQ
|
||||
onto accelerators or GPUs with limited memory usage. This section explains how to perform LLM quantization using AMD Quark, GPTQ
|
||||
and bitsandbytes on AMD Instinct hardware.
|
||||
|
||||
.. _quantize-llms-quark:
|
||||
|
||||
AMD Quark
|
||||
=========
|
||||
|
||||
`AMD Quark <https://quark.docs.amd.com/latest/>`_ offers the leading efficient and scalable quantization solution tailored to AMD Instinct GPUs. It supports ``FP8`` and ``INT8`` quantization for activations, weights, and KV cache,
|
||||
including ``FP8`` attention. For very large models, it employs a two-level ``INT4-FP8`` scheme—storing weights in ``INT4`` while computing with ``FP8``—for nearly 4× compression without sacrificing accuracy.
|
||||
Quark scales efficiently across multiple GPUs, efficiently handling ultra-large models like Llama-3.1-405B. Quantized ``FP8`` models like Llama, Mixtral, and Grok-1 are available under the `AMD organization on Hugging Face <https://huggingface.co/collections/amd/quark-quantized-ocp-fp8-models-66db7936d18fcbaf95d4405c>`_, and can be deployed directly via `vLLM <https://github.com/vllm-project/vllm/tree/main/vllm>`_.
|
||||
|
||||
Installing Quark
|
||||
-------------------
|
||||
|
||||
The latest release of Quark can be installed with pip
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
pip install amd-quark
|
||||
|
||||
For detailed installation instructions, refer to the `Quark documentation <https://quark.docs.amd.com/latest/install.html>`_.
|
||||
|
||||
|
||||
Using Quark for quantization
|
||||
-----------------------------
|
||||
|
||||
#. First, load the pre-trained model and its corresponding tokenizer using the Hugging Face ``transformers`` library.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
|
||||
MAX_SEQ_LEN = 512
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_ID, device_map="auto", torch_dtype="auto",
|
||||
)
|
||||
model.eval()
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, model_max_length=MAX_SEQ_LEN)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
#. Prepare the calibration DataLoader (static quantization requires calibration data).
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
BATCH_SIZE = 1
|
||||
NUM_CALIBRATION_DATA = 512
|
||||
|
||||
dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
|
||||
text_data = dataset["text"][:NUM_CALIBRATION_DATA]
|
||||
|
||||
tokenized_outputs = tokenizer(
|
||||
text_data, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SEQ_LEN
|
||||
)
|
||||
calib_dataloader = DataLoader(
|
||||
tokenized_outputs['input_ids'], batch_size=BATCH_SIZE, drop_last=True
|
||||
)
|
||||
|
||||
#. Define the quantization configuration. See the comments in the following code snippet for descriptions of each configuration option.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from quark.torch.quantization import (Config, QuantizationConfig,
|
||||
FP8E4M3PerTensorSpec)
|
||||
|
||||
# Define fp8/per-tensor/static spec.
|
||||
FP8_PER_TENSOR_SPEC = FP8E4M3PerTensorSpec(observer_method="min_max",
|
||||
is_dynamic=False).to_quantization_spec()
|
||||
|
||||
# Define global quantization config, input tensors and weight apply FP8_PER_TENSOR_SPEC.
|
||||
global_quant_config = QuantizationConfig(input_tensors=FP8_PER_TENSOR_SPEC,
|
||||
weight=FP8_PER_TENSOR_SPEC)
|
||||
|
||||
# Define quantization config for kv-cache layers, output tensors apply FP8_PER_TENSOR_SPEC.
|
||||
KV_CACHE_SPEC = FP8_PER_TENSOR_SPEC
|
||||
kv_cache_layer_names_for_llama = ["*k_proj", "*v_proj"]
|
||||
kv_cache_quant_config = {name :
|
||||
QuantizationConfig(input_tensors=global_quant_config.input_tensors,
|
||||
weight=global_quant_config.weight,
|
||||
output_tensors=KV_CACHE_SPEC)
|
||||
for name in kv_cache_layer_names_for_llama}
|
||||
layer_quant_config = kv_cache_quant_config.copy()
|
||||
|
||||
EXCLUDE_LAYERS = ["lm_head"]
|
||||
quant_config = Config(
|
||||
global_quant_config=global_quant_config,
|
||||
layer_quant_config=layer_quant_config,
|
||||
kv_cache_quant_config=kv_cache_quant_config,
|
||||
exclude=EXCLUDE_LAYERS)
|
||||
|
||||
#. Quantize the model and export
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import torch
|
||||
from quark.torch import ModelQuantizer, ModelExporter
|
||||
from quark.torch.export import ExporterConfig, JsonExporterConfig
|
||||
|
||||
# Apply quantization.
|
||||
quantizer = ModelQuantizer(quant_config)
|
||||
quant_model = quantizer.quantize_model(model, calib_dataloader)
|
||||
|
||||
# Freeze quantized model to export.
|
||||
freezed_model = quantizer.freeze(model)
|
||||
|
||||
# Define export config.
|
||||
LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"]
|
||||
export_config = ExporterConfig(json_export_config=JsonExporterConfig())
|
||||
export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP
|
||||
|
||||
EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor"
|
||||
exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR)
|
||||
with torch.no_grad():
|
||||
exporter.export_safetensors_model(freezed_model,
|
||||
quant_config=quant_config, tokenizer=tokenizer)
|
||||
|
||||
Evaluating the quantized model with vLLM
|
||||
----------------------------------------
|
||||
|
||||
The exported Quark-quantized model can be loaded directly by vLLM for inference. You need to specify the model path and inform vLLM about the quantization method (``quantization='quark'``) and the KV cache data type (``kv_cache_dtype='fp8'``).
|
||||
Use the ``LLM`` interface to load the model:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from vllm import LLM, SamplingParamsinterface
|
||||
|
||||
# Sample prompts.
|
||||
prompts = [
|
||||
"Hello, my name is",
|
||||
"The president of the United States is",
|
||||
"The capital of France is",
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
|
||||
|
||||
# Create an LLM.
|
||||
llm = LLM(model="Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor",
|
||||
kv_cache_dtype='fp8',quantization='quark')
|
||||
# Generate texts from the prompts. The output is a list of RequestOutput objects
|
||||
# that contain the prompt, generated text, and other information.
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:\n" + "-" * 60)
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
print(f"Prompt: {prompt!r}")
|
||||
print(f"Output: {generated_text!r}")
|
||||
print("-" * 60)
|
||||
|
||||
You can also evaluate the quantized model's accuracy on standard benchmarks using the `lm-evaluation-harness <https://github.com/EleutherAI/lm-evaluation-harness>`_. Pass the necessary vLLM arguments to ``lm_eval`` via ``--model_args``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
lm_eval --model vllm \
|
||||
--model_args pretrained=Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor,kv_cache_dtype='fp8',quantization='quark' \
|
||||
--tasks gsm8k
|
||||
|
||||
This provides a standardized way to measure the performance impact of quantization.
|
||||
.. _fine-tune-llms-gptq:
|
||||
|
||||
GPTQ
|
||||
@@ -33,7 +196,7 @@ The AutoGPTQ library implements the GPTQ algorithm.
|
||||
.. code-block:: shell
|
||||
|
||||
# This will install pre-built wheel for a specific ROCm version.
|
||||
|
||||
|
||||
pip install auto-gptq --no-build-isolation --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm573/
|
||||
|
||||
Or, install AutoGPTQ from source for the appropriate ROCm version (for example, ROCm 6.1).
|
||||
@@ -43,10 +206,10 @@ The AutoGPTQ library implements the GPTQ algorithm.
|
||||
# Clone the source code.
|
||||
git clone https://github.com/AutoGPTQ/AutoGPTQ.git
|
||||
cd AutoGPTQ
|
||||
|
||||
|
||||
# Speed up the compilation by specifying PYTORCH_ROCM_ARCH to target device.
|
||||
PYTORCH_ROCM_ARCH=gfx942 ROCM_VERSION=6.1 pip install .
|
||||
|
||||
|
||||
# Show the package after the installation
|
||||
|
||||
#. Run ``pip show auto-gptq`` to print information for the installed ``auto-gptq`` package. Its output should look like
|
||||
@@ -112,7 +275,7 @@ Using GPTQ with Hugging Face Transformers
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
|
||||
|
||||
|
||||
base_model_name = " NousResearch/Llama-2-7b-hf"
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
|
||||
@@ -212,10 +375,10 @@ To get started with bitsandbytes primitives, use the following code as reference
|
||||
.. code-block:: python
|
||||
|
||||
import bitsandbytes as bnb
|
||||
|
||||
|
||||
# Use Int8 Matrix Multiplication
|
||||
bnb.matmul(..., threshold=6.0)
|
||||
|
||||
|
||||
# Use bitsandbytes 8-bit Optimizers
|
||||
adam = bnb.optim.Adam8bit(model.parameters(), lr=0.001, betas=(0.9, 0.995))
|
||||
|
||||
@@ -227,14 +390,14 @@ To load a Transformers model in 4-bit, set ``load_in_4bit=true`` in ``BitsAndByt
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
||||
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
||||
bnb_model_4bit = AutoModelForCausalLM.from_pretrained(
|
||||
base_model_name,
|
||||
device_map="auto",
|
||||
quantization_config=quantization_config)
|
||||
|
||||
|
||||
# Check the memory footprint with get_memory_footprint method
|
||||
print(bnb_model_4bit.get_memory_footprint())
|
||||
|
||||
@@ -243,9 +406,9 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
.. code-block:: python
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
|
||||
|
||||
base_model_name = "NousResearch/Llama-2-7b-hf"
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
||||
@@ -253,7 +416,7 @@ To load a model in 8-bit for inference, use the ``load_in_8bit`` option.
|
||||
base_model_name,
|
||||
device_map="auto",
|
||||
quantization_config=quantization_config)
|
||||
|
||||
|
||||
prompt = "What is a large language model?"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
||||
generated_ids = model.generate(**inputs)
|
||||
|
||||
@@ -92,6 +92,10 @@ PyTorch inference performance testing
|
||||
|
||||
docker pull rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue
|
||||
|
||||
.. note::
|
||||
|
||||
The Chai-1 benchmark uses a specifically selected Docker image using ROCm 6.2.3 and PyTorch 2.3.0 to address an accuracy issue.
|
||||
|
||||
.. container:: model-doc pyt_clip_inference
|
||||
|
||||
2. Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0_triton_llvm_reg_issue/images/sha256-b736a4239ab38a9d0e448af6d4adca83b117debed00bfbe33846f99c4540f79b>`_ from Docker Hub.
|
||||
|
||||
@@ -34,7 +34,7 @@ vLLM inference performance testing
|
||||
|
||||
.. _vllm-benchmark-available-models:
|
||||
|
||||
Available models
|
||||
Supported models
|
||||
================
|
||||
|
||||
.. raw:: html
|
||||
@@ -183,6 +183,25 @@ vLLM inference performance testing
|
||||
to collect latency and throughput performance data, you can also change the benchmarking
|
||||
parameters. See the standalone benchmarking tab for more information.
|
||||
|
||||
{% if model.tunableop %}
|
||||
|
||||
.. note::
|
||||
|
||||
For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
|
||||
TunableOp automatically explores different implementations and configurations of certain PyTorch
|
||||
operators to find the fastest one for your hardware.
|
||||
|
||||
By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
|
||||
(see
|
||||
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
|
||||
enable it, edit the default run behavior in the ``models.json``
|
||||
configuration before running inference -- update the model's run
|
||||
``args`` by changing ``--tunableop off`` to ``--tunableop on``.
|
||||
|
||||
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
|
||||
|
||||
{% endif %}
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
Run the vLLM benchmark tool independently by starting the
|
||||
@@ -331,11 +350,18 @@ for benchmarking, see the version-specific documentation.
|
||||
- PyTorch version
|
||||
- Resources
|
||||
|
||||
* - 6.3.1
|
||||
- 0.7.3
|
||||
- 2.7.0
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
|
||||
|
||||
* - 6.3.1
|
||||
- 0.6.6
|
||||
- 2.7.0
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.2/how-to/rocm-for-ai/inference/vllm-benchmark.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_
|
||||
|
||||
* - 6.2.1
|
||||
|
||||
@@ -12,7 +12,7 @@ ROCm is an optimized fork of the upstream
|
||||
`<https://github.com/AI-Hypercomputer/maxtext>`__ enabling efficient AI workloads
|
||||
on AMD MI300X series accelerators.
|
||||
|
||||
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.4``) image
|
||||
The MaxText for ROCm training Docker (``rocm/jax-training:maxtext-v25.5``) image
|
||||
provides a prebuilt environment for training on AMD Instinct MI300X and MI325X accelerators,
|
||||
including essential components like JAX, XLA, ROCm libraries, and MaxText utilities.
|
||||
It includes the following software components:
|
||||
@@ -20,15 +20,15 @@ It includes the following software components:
|
||||
+--------------------------+--------------------------------+
|
||||
| Software component | Version |
|
||||
+==========================+================================+
|
||||
| ROCm | 6.3.0 |
|
||||
| ROCm | 6.3.4 |
|
||||
+--------------------------+--------------------------------+
|
||||
| JAX | 0.4.31 |
|
||||
| JAX | 0.4.35 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Python | 3.10 |
|
||||
| Python | 3.10.12 |
|
||||
+--------------------------+--------------------------------+
|
||||
| Transformer Engine | 1.12.0.dev0+f81a3eb |
|
||||
| Transformer Engine | 1.12.0.dev0+b8b92dc |
|
||||
+--------------------------+--------------------------------+
|
||||
| hipBLASLt | git78ec8622 |
|
||||
| hipBLASLt | 0.13.0-ae9c477a |
|
||||
+--------------------------+--------------------------------+
|
||||
|
||||
Supported features and models
|
||||
@@ -48,6 +48,8 @@ MaxText provides the following key features to train large language models effic
|
||||
|
||||
The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
|
||||
|
||||
* Llama 3.3 70B
|
||||
|
||||
* Llama 3.1 8B
|
||||
|
||||
* Llama 3.1 70B
|
||||
@@ -115,7 +117,7 @@ with RDMA, skip ahead to :ref:`amd-maxtext-download-docker`.
|
||||
|
||||
a. Master address
|
||||
|
||||
Change `localhost` to the master node's resolvable hostname or IP address:
|
||||
Change ``localhost`` to the master node's resolvable hostname or IP address:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@@ -180,13 +182,15 @@ Download the Docker image
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/jax-training:maxtext-v25.4
|
||||
docker pull rocm/jax-training:maxtext-v25.5
|
||||
|
||||
2. Run the Docker container.
|
||||
2. Use the following command to launch the Docker container. Note that the benchmarking scripts
|
||||
used in the :ref:`following section <amd-maxtext-get-started>` automatically launch the Docker container
|
||||
and execute the benchmark.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.4
|
||||
docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME/.ssh:/root/.ssh --shm-size 128G --name maxtext_training rocm/jax-training:maxtext-v25.5
|
||||
|
||||
.. _amd-maxtext-get-started:
|
||||
|
||||
@@ -219,7 +223,9 @@ Single node training benchmarking examples
|
||||
|
||||
Run the single node training benchmark:
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_7b.sh
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_7b.sh
|
||||
|
||||
* Example 2: Single node training with Llama 2 70B
|
||||
|
||||
@@ -233,7 +239,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama2_70b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama2_70b.sh
|
||||
|
||||
* Example 3: Single node training with Llama 3 8B
|
||||
|
||||
@@ -247,7 +253,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_8b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_8b.sh
|
||||
|
||||
* Example 4: Single node training with Llama 3 70B
|
||||
|
||||
@@ -261,9 +267,23 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./llama3_70b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3_70b.sh
|
||||
|
||||
* Example 5: Single node training with DeepSeek V2 16B
|
||||
* Example 5: Single node training with Llama 3.3 70B
|
||||
|
||||
Download the benchmarking script:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
wget https://raw.githubusercontent.com/ROCm/maxtext/refs/heads/main/benchmarks/gpu-rocm/llama3.3_70b.sh
|
||||
|
||||
Run the single node training benchmark:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./llama3.3_70b.sh
|
||||
|
||||
* Example 6: Single node training with DeepSeek V2 16B
|
||||
|
||||
Download the benchmarking script:
|
||||
|
||||
@@ -275,7 +295,7 @@ Single node training benchmarking examples
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
IMAGE="rocm/jax-training:maxtext-v25.4" bash ./deepseek_v2_16b.sh
|
||||
IMAGE="rocm/jax-training:maxtext-v25.5" bash ./deepseek_v2_16b.sh
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -343,3 +363,26 @@ own cluster setup.
|
||||
.. code-block:: shell
|
||||
|
||||
sbatch -N <num_nodes> llama3_70b_multinode.sh
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
This table lists previous versions of the ROCm JAX MaxText Docker image for training
|
||||
performance testing. For detailed information about available models for
|
||||
benchmarking, see the version-specific documentation.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
|
||||
* - Image version
|
||||
- ROCm version
|
||||
- JAX version
|
||||
- Resources
|
||||
|
||||
* - 25.4
|
||||
- 6.3.0
|
||||
- 0.4.31
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.4/images/sha256-fb3eb71cd74298a7b3044b7130cf84113f14d518ff05a2cd625c11ea5f6a7b01>`_
|
||||
|
||||
@@ -443,7 +443,7 @@ benchmarking, see the version-specific documentation.
|
||||
- 6.3.0
|
||||
- 2.7.0a0+git637433
|
||||
-
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.4/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
|
||||
* `Documentation <https://rocm.docs.amd.com/en/docs-6.3.3/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.4/images/sha256-fa98a9aa69968e654466c06f05aaa12730db79b48b113c1ab4f7a5fe6920a20b>`_
|
||||
|
||||
* - v25.3
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
(communication-libraries)=
|
||||
|
||||
* {doc}`RCCL <rccl:index>`
|
||||
* [rocSHMEM](https://github.com/ROCm/rocSHMEM)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} Math
|
||||
|
||||
@@ -12,14 +12,14 @@ subtrees:
|
||||
- file: compatibility/compatibility-matrix.rst
|
||||
title: Compatibility matrix
|
||||
entries:
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/reference/system-requirements.html
|
||||
title: Linux system requirements
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
|
||||
title: Windows system requirements
|
||||
|
||||
- caption: Install
|
||||
entries:
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/
|
||||
title: ROCm on Linux
|
||||
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
|
||||
title: HIP SDK on Windows
|
||||
|
||||
@@ -10,7 +10,7 @@ ROCm is a software stack, composed primarily of open-source software, that
|
||||
provides the tools for programming AMD Graphics Processing Units (GPUs), from
|
||||
low-level kernels to high-level end-user applications.
|
||||
|
||||
.. image:: data/rocm-software-stack-6_3_2.jpg
|
||||
.. image:: data/rocm-software-stack-6_4_0.jpg
|
||||
:width: 800
|
||||
:alt: AMD's ROCm software stack and enabling technologies.
|
||||
:align: center
|
||||
@@ -52,6 +52,7 @@ Communication
|
||||
:header: "Component", "Description"
|
||||
|
||||
":doc:`RCCL <rccl:index>`", "Standalone library that provides multi-GPU and multi-node collective communication primitives"
|
||||
"`rocSHMEM <https://github.com/ROCm/rocSHMEM>`_", "Runtime that provides GPU-centric networking through an OpenSHMEM-like interface. This intra-kernel networking library simplifies application code complexity and enables more fine-grained communication/computation overlap than traditional host-driven networking."
|
||||
|
||||
Math
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Submodule submodule-srcs/AMDMIGraphX deleted from e8b735f65e
Submodule submodule-srcs/HIP deleted from a0a2dd7fc8
Submodule submodule-srcs/HIPIFY deleted from b803a5270b
Submodule submodule-srcs/MIOpen deleted from a583315f3e
Submodule submodule-srcs/MIVisionX deleted from a2b69e5b30
Submodule submodule-srcs/ROCK-Kernel-Driver deleted from e796ccd5f9
Submodule submodule-srcs/ROCR-Runtime deleted from 4264d016ec
Submodule submodule-srcs/ROCdbgapi deleted from 59be7ff0aa
Submodule submodule-srcs/ROCgdb deleted from 401bb21f2f
Submodule submodule-srcs/ROCmValidationSuite deleted from fb251886ed
Submodule submodule-srcs/Tensile deleted from be49885fce
Submodule submodule-srcs/TransferBench deleted from 3ea2f226ec
Submodule submodule-srcs/amdsmi deleted from ede62f2534
Submodule submodule-srcs/clr deleted from 0f2d602424
Submodule submodule-srcs/composable_kernel deleted from a8c5bd9b9a
Submodule submodule-srcs/half deleted from 1ddada2251
Submodule submodule-srcs/hip-tests deleted from 3573bde0c2
Submodule submodule-srcs/hipBLAS deleted from 0a335435e9
Submodule submodule-srcs/hipBLAS-common deleted from 7c1566ba46
Submodule submodule-srcs/hipBLASLt deleted from a999b0721d
Submodule submodule-srcs/hipCUB deleted from a6005943c5
Submodule submodule-srcs/hipFFT deleted from 396169c84a
Submodule submodule-srcs/hipRAND deleted from d2516cc199
Submodule submodule-srcs/hipSOLVER deleted from ca0de3c9c9
Submodule submodule-srcs/hipSPARSE deleted from a6c62e48eb
Submodule submodule-srcs/hipSPARSELt deleted from f3f4f590a4
Submodule submodule-srcs/hipTensor deleted from e5529b9291
Submodule submodule-srcs/hipfort deleted from f3d6aa3e86
Submodule submodule-srcs/hipother deleted from 49b1588f83
Submodule submodule-srcs/llvm-project deleted from c7fe45cf4b
Submodule submodule-srcs/openmp-extras/aomp deleted from 1cd9ec1017
Submodule submodule-srcs/openmp-extras/aomp-extras deleted from 97567952ae
Submodule submodule-srcs/rccl deleted from 7b86f83d84
Submodule submodule-srcs/rdc deleted from be34d624f6
Submodule submodule-srcs/rocAL deleted from 373ef865ac
Submodule submodule-srcs/rocALUTION deleted from 9713084af8
Submodule submodule-srcs/rocBLAS deleted from 80e5394d6a
Submodule submodule-srcs/rocDecode deleted from a2a7b63cad
Submodule submodule-srcs/rocFFT deleted from 058ba87fdc
Submodule submodule-srcs/rocJPEG deleted from 73d36d35d9
Submodule submodule-srcs/rocPRIM deleted from d8771ec18a
Submodule submodule-srcs/rocPyDecode deleted from 848e49d29d
Submodule submodule-srcs/rocRAND deleted from 4d5d3a88d1
Submodule submodule-srcs/rocSHMEM deleted from 7702b3c0f3
Submodule submodule-srcs/rocSOLVER deleted from db754e3f55
Submodule submodule-srcs/rocSPARSE deleted from 4953add0ae
Submodule submodule-srcs/rocThrust deleted from 6bf2777019
Submodule submodule-srcs/rocWMMA deleted from 1a5b623166
Submodule submodule-srcs/rocm-cmake deleted from ecc716b97c
Submodule submodule-srcs/rocm-core deleted from 73dae9c82a
Submodule submodule-srcs/rocm-examples deleted from 3bbd2987a3
Submodule submodule-srcs/rocm_bandwidth_test deleted from 84b8ddd268
Submodule submodule-srcs/rocm_smi_lib deleted from 03a4530b68
Submodule submodule-srcs/rocminfo deleted from 6ea2ba38c8
Submodule submodule-srcs/rocprofiler deleted from 40da7312a0
Submodule submodule-srcs/rocprofiler-compute deleted from a11d700e10
Submodule submodule-srcs/rocprofiler-register deleted from 7c6cd44f63
Submodule submodule-srcs/rocprofiler-sdk deleted from e8e49fe769
Submodule submodule-srcs/rocprofiler-systems deleted from 9c07bf3ab0
Submodule submodule-srcs/rocr_debug_agent deleted from 5c49ec91fd
Submodule submodule-srcs/roctracer deleted from f55a694381
Submodule submodule-srcs/rpp deleted from 5fb204ca70
Submodule submodule-srcs/spirv-llvm-translator deleted from 8ed662a93b
Reference in New Issue
Block a user