mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
Compare commits
9 Commits
llvm2-upda
...
JeniferC99
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
099b646195 | ||
|
|
1db3919e2c | ||
|
|
9932cd4ac2 | ||
|
|
e8d104124f | ||
|
|
26f708da87 | ||
|
|
5a5e4dbb6e | ||
|
|
bab853a0d3 | ||
|
|
5c7ccb3c26 | ||
|
|
37faf170b1 |
@@ -40,6 +40,7 @@ parameters:
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -125,10 +126,13 @@ jobs:
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# NOTE: content between `---` is for transition support between old/new build systems
|
||||
# and should be removed once transition is complete.
|
||||
# -----------------------------
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
- script: mkdir $(Pipeline.Workspace)/deps
|
||||
- script: mkdir -p $(Pipeline.Workspace)/deps
|
||||
displayName: Create temp folder for external dependencies
|
||||
# hipSPARSELt already has a CMake script for external deps, so we can just run that
|
||||
# https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
|
||||
@@ -144,22 +148,35 @@ jobs:
|
||||
- script: sudo make install
|
||||
displayName: Install hipSPARSELt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# -----------------------------
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
# NOTE: the following options are old build only
|
||||
# and can be removed after full transition to new build
|
||||
# -DAMDGPU_TARGETS=${{ job.target }}
|
||||
# -DCMAKE_Fortran_COMPILER=f95
|
||||
# -DTensile_LOGIC=
|
||||
# -DTensile_CPU_THREADS=
|
||||
# -DTensile_LIBRARY_FORMAT=msgpack
|
||||
# -DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
# -DBUILD_CLIENTS_TESTS=ON
|
||||
# -DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
-DHIPSPARSELT_ENABLE_FETCH=ON
|
||||
-GNinja
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
|
||||
@@ -72,6 +72,7 @@ CU
|
||||
CUDA
|
||||
CUs
|
||||
CXX
|
||||
CX
|
||||
Cavium
|
||||
CentOS
|
||||
ChatGPT
|
||||
@@ -118,6 +119,8 @@ Dependabot
|
||||
Deprecations
|
||||
DevCap
|
||||
DirectX
|
||||
Disaggregated
|
||||
disaggregated
|
||||
Dockerfile
|
||||
Dockerized
|
||||
Doxygen
|
||||
@@ -127,6 +130,7 @@ ENDPGM
|
||||
EPYC
|
||||
ESXi
|
||||
EoS
|
||||
etcd
|
||||
fas
|
||||
FBGEMM
|
||||
FIFOs
|
||||
@@ -178,6 +182,7 @@ GPUs
|
||||
Graphbolt
|
||||
GraphSage
|
||||
GRBM
|
||||
GRE
|
||||
GenAI
|
||||
GenZ
|
||||
GitHub
|
||||
@@ -301,6 +306,7 @@ MirroredStrategy
|
||||
Mixtral
|
||||
MosaicML
|
||||
MoEs
|
||||
Mooncake
|
||||
Mpops
|
||||
Multicore
|
||||
Multithreaded
|
||||
@@ -445,6 +451,7 @@ SKU
|
||||
SKUs
|
||||
SLES
|
||||
SLURM
|
||||
Slurm
|
||||
SMEM
|
||||
SMFMA
|
||||
SMI
|
||||
@@ -615,6 +622,7 @@ coalescable
|
||||
codename
|
||||
collater
|
||||
comgr
|
||||
compat
|
||||
completers
|
||||
composable
|
||||
concretization
|
||||
@@ -776,6 +784,7 @@ lossy
|
||||
macOS
|
||||
matchers
|
||||
maxtext
|
||||
megablocks
|
||||
megatron
|
||||
microarchitecture
|
||||
migraphx
|
||||
@@ -934,6 +943,7 @@ softmax
|
||||
spack
|
||||
spmm
|
||||
src
|
||||
stanford
|
||||
stochastically
|
||||
strided
|
||||
subcommand
|
||||
|
||||
37
RELEASE.md
37
RELEASE.md
@@ -61,7 +61,7 @@ for more information about operating system and hardware compatibility.
|
||||
|
||||
ROCm 7.0.0 introduces support for KVM Passthrough for AMD Instinct MI350X and MI355X GPUs.
|
||||
|
||||
All KVM-based SR-IOV supported configurations require the GIM SR-IOV driver version 8.4.0.K. Refer to [GIM Release note](https://github.com/amd/MxGPU-Virtualization/releases) for more details. In addition, support for VMware ESXi 8 has been introduced for AMD Instinct MI300X GPUs. For more information, see [Virtualization Support](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#virtualization-support).
|
||||
All KVM-based SR-IOV supported configurations require the GIM SR-IOV driver version [8.4.0.K](https://github.com/amd/MxGPU-Virtualization/releases/tag/mainline%2F8.4.0.K). In addition, support for VMware ESXi 8 has been introduced for AMD Instinct MI300X GPUs. For more information, see [Virtualization Support](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#virtualization-support).
|
||||
|
||||
### Deep learning and AI framework updates
|
||||
|
||||
@@ -172,7 +172,7 @@ Key compiler enhancements include:
|
||||
* Comgr:
|
||||
* Added support for an in-memory virtual file system (VFS) for storing temporary files generated during intermediate compilation steps. This is designed to improve performance by reducing on-disk file I/O. Currently, VFS is supported only for the device library link step, with plans for expanded support in future releases.
|
||||
* SPIR-V:
|
||||
* Improved [target-specific extensions](https://github.com/ROCm/llvm-project/blob/c2535466c6e40acd5ecf6ba1676a4e069c6245cc/clang/docs/LanguageExtensions.rst):
|
||||
* Improved [target-specific extensions](https://github.com/llvm/llvm-project/blob/main/clang/docs/LanguageExtensions.rst#target-specific-extensions):
|
||||
* Added a new target-specific builtin ``__builtin_amdgcn_processor_is`` for late or deferred queries of the current target processor.
|
||||
* Added a new target-specific builtin ``__builtin_amdgcn_is_invocable``, enabling fine-grained, per-builtin feature availability.
|
||||
* The compiler driver now uses parallel code generation by default when compiling using full LTO (including when using the `-fgpu-rdc` option) for HIP. This divides the optimized LLVM IR module into roughly equal partitions before instruction selection and lowering, which can help improve build times.
|
||||
@@ -335,8 +335,9 @@ ROCm documentation continues to be updated to provide clearer and more comprehen
|
||||
benchmarking guides have been updated with expanded model coverage and
|
||||
optimized Docker environments. Highlights include:
|
||||
|
||||
* The [Training a model with Primus and Megatron](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.html) benchmarking guide
|
||||
now leverages the unified AMD Primus framework with the Megatron backend. See [Primus: A Lightweight, Unified Training Framework for Large Models on AMD
|
||||
* The [Training a model with Primus and Megatron](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.html)
|
||||
and [Training a model with Primus and PyTorch](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch.html) benchmarking guides
|
||||
now leverage the unified AMD Primus framework with the Megatron and torchtitan backends. See [Primus: A Lightweight, Unified Training Framework for Large Models on AMD
|
||||
GPUs](https://rocm.blogs.amd.com/software-tools-optimization/primus/README.html) for an introduction to Primus.
|
||||
|
||||
* The [Training a model with PyTorch](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html) benchmarking guide
|
||||
@@ -345,6 +346,9 @@ ROCm documentation continues to be updated to provide clearer and more comprehen
|
||||
* The [Training a model with JAX MaxText](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.html) benchmarking guide
|
||||
now supports [MAD](https://github.com/ROCm/MAD)-integrated benchmarking. The MaxText training environment now uses JAX 0.6.0 or 0.5.0. FP8 quantized training is supported with JAX 0.5.0.
|
||||
|
||||
* The [SGLang distributed inference](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.html?model=llama-3.1-8b-instruct) guide
|
||||
provides a recipe to get started with disaggregated prefill/decode inference.
|
||||
|
||||
* The [vLLM inference performance testing](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference/benchmark-docker/vllm.html) documentation
|
||||
now features clearer serving and throughput benchmarking commands -- for improved transparency of model benchmarking configurations. The vLLM inference
|
||||
environment now uses vLLM 0.10.1 and includes improved default configurations.
|
||||
@@ -408,17 +412,16 @@ ROCm documentation continues to be updated to provide clearer and more comprehen
|
||||
|
||||
## User space, driver, and firmware dependent changes
|
||||
|
||||
The software for AMD Datacenter GPU products requires maintaining a hardware
|
||||
and software stack with interdependencies between the GPU and baseboard
|
||||
firmware, AMD GPU drivers, and the ROCm user space software.
|
||||
|
||||
Running GPU software on AMD data center GPUs requires maintaining a coordinated
|
||||
hardware and software stack. This stack has interdependencies between the GPU
|
||||
and baseboard firmware, AMD GPU drivers, and the ROCm user-space software.
|
||||
As of the ROCm 7.0.0 release, these interdependencies are publicly documented.
|
||||
Note that while AMD publishes drivers and ROCm user space, your server or
|
||||
While AMD publishes drivers and ROCm user space components, your server or
|
||||
infrastructure provider publishes the GPU and baseboard firmware by bundling
|
||||
AMD’s firmware releases via AMD’s Platform Level Data Model (PLDM) bundle,
|
||||
which includes Integrated Firmware Image (IFWI).
|
||||
|
||||
GPU and baseboard firmware versioning might differ across GPU families. With the
|
||||
GPU and baseboard firmware versioning might differ across GPU families. Note that with the
|
||||
ROCm 7.0.0 release, the AMD GPU driver (amdgpu) is now versioned separately
|
||||
from ROCm. See [AMD GPU Driver/ROCm packaging separation](#amd-gpu-driver-rocm-packaging-separation).
|
||||
|
||||
@@ -2552,31 +2555,31 @@ issues related to individual components, review the [Detailed component changes]
|
||||
|
||||
### A memory error in the kernel might lead to applications using the ROCr library becoming unresponsive
|
||||
|
||||
Applications using the ROCr library might become unresponsive if a memory error occurs in the launched kernel when the queue from which it was launched is destroyed. The application is unable to receive further signal, resulting in the stall condition. The issue will be fixed in a future ROCm release.
|
||||
Applications using the ROCr library might become unresponsive if a memory error occurs in the launched kernel when the queue from which it was launched is destroyed. The application is unable to receive further signal, resulting in the stall condition. The issue will be fixed in a future ROCm release. See [GitHub issue #5334](https://github.com/ROCm/ROCm/issues/5334).
|
||||
|
||||
### Applications using stream capture APIs might fail during stream capture
|
||||
|
||||
Applications using ``hipLaunchHostFunc`` with stream capture APIs might fail to capture graphs during stream capture, and return `hipErrorStreamCaptureUnsupported`. This issue resulted from an update in ``hipStreamAddCallback``. This issue will be fixed in a future ROCm release.
|
||||
Applications using ``hipLaunchHostFunc`` with stream capture APIs might fail to capture graphs during stream capture, and return `hipErrorStreamCaptureUnsupported`. This issue resulted from an update in ``hipStreamAddCallback``. This issue will be fixed in a future ROCm release. See [GitHub issue #5337](https://github.com/ROCm/ROCm/issues/5337).
|
||||
|
||||
### Compilation failure via hipRTC when compiling with std=c++11
|
||||
|
||||
Applications compiling kernels using `hipRTC` might fail while passing the `std=c++11` compiler option. This issue will be fixed in a future ROCm release.
|
||||
Applications compiling kernels using `hipRTC` might fail while passing the `std=c++11` compiler option. This issue will be fixed in a future ROCm release. See [GitHub issue #5341](https://github.com/ROCm/ROCm/issues/5341).
|
||||
|
||||
### Compilation failure when referencing std::array if _GLIBCXX_ASSERTIONS is defined
|
||||
|
||||
Compiling from a device kernel or function results in failure when attempting to reference `std::array` if `_GLIBCXX_ASSERTIONS` is defined. The issue occurs because there's no device definition for `std::__glibcxx_asert_fail()`. This issue will be resolved in a future ROCm release with the implementation of `std::__glibcxx_assert_fail()`.
|
||||
Compiling from a device kernel or function results in failure when attempting to reference `std::array` if `_GLIBCXX_ASSERTIONS` is defined. The issue occurs because there's no device definition for `std::__glibcxx_asert_fail()`. This issue will be resolved in a future ROCm release with the implementation of `std::__glibcxx_assert_fail()`. See [GitHub issue #5342](https://github.com/ROCm/ROCm/issues/5342).
|
||||
|
||||
### Segmentation fault in ROCprofiler-SDK due to ABI mismatch affecting std::regex
|
||||
|
||||
Starting with GCC 5.1, GNU `libstdc++` introduced a dual Application Binary Interface (ABI) to adopt `C++11`, primarily affecting the `std::string` and its dependencies, including `std::regex`. If your code is compiled against headers expecting one ABI but linked or run with the other, it can cause problems with `std::string` and `std::regex`, leading to a segmentation fault in ROCprofiler-SDK, which uses `std::regex`. This issue is resolved in the [ROCm Systems `develop` branch](https://github.com/ROCm/rocm-systems) and will be part of a future ROCm release.
|
||||
Starting with GCC 5.1, GNU `libstdc++` introduced a dual Application Binary Interface (ABI) to adopt `C++11`, primarily affecting the `std::string` and its dependencies, including `std::regex`. If your code is compiled against headers expecting one ABI but linked or run with the other, it can cause problems with `std::string` and `std::regex`, leading to a segmentation fault in ROCprofiler-SDK, which uses `std::regex`. This issue is resolved in the [ROCm Systems `develop` branch](https://github.com/ROCm/rocm-systems) and will be part of a future ROCm release. See [GitHub issue #5343](https://github.com/ROCm/ROCm/issues/5343).
|
||||
|
||||
### Decline in performance of batched GEMM operation for applications using hipBLASLT kernels
|
||||
|
||||
Default batched General Matrix Multiplications (GEMM) operations for rocBLAS and hipBLAS on gfx1200 and gfx1201 may have a decline in performance in comparison with non-batched and strided_batched GEMM operations. By default, the batched GEMM uses hipBLASLT kernels, and switching to the Tensile kernel resolves the performance decline issue. The issue will be fixed in a future ROCm release. As a workaround, you can set the environment variable `ROCBLAS_USE_HIPBLASLT=0` before the batched GEMM operation is performed on gfx1200 and gfx1201. After completing the batched operation, reset the variable to `ROCBLAS_USE_HIPBLASLT=1` before calling non-batched or strided_batched operations.
|
||||
Default batched General Matrix Multiplications (GEMM) operations for rocBLAS and hipBLAS on gfx1200 and gfx1201 may have a decline in performance in comparison with non-batched and strided_batched GEMM operations. By default, the batched GEMM uses hipBLASLT kernels, and switching to the Tensile kernel resolves the performance decline issue. The issue will be fixed in a future ROCm release. As a workaround, you can set the environment variable `ROCBLAS_USE_HIPBLASLT=0` before the batched GEMM operation is performed on gfx1200 and gfx1201. After completing the batched operation, reset the variable to `ROCBLAS_USE_HIPBLASLT=1` before calling non-batched or strided_batched operations. See [GitHub issue #5344](https://github.com/ROCm/ROCm/issues/5344).
|
||||
|
||||
### Failure to declare out-of-bound CPERs for bad memory page
|
||||
|
||||
Exceeding bad memory page threshold fails to declare Out-Of-Band Common Platform Error Records (CPERs). This issue affects all AMD Instinct MI300 Series and MI350 Series GPUs, and will be fixed in a future AMD GPU Driver release.
|
||||
Exceeding bad memory page threshold fails to declare Out-Of-Band Common Platform Error Records (CPERs). This issue affects all AMD Instinct MI300 Series and MI350 Series GPUs, and will be fixed in a future AMD GPU Driver release. See [GitHub issue #5345](https://github.com/ROCm/ROCm/issues/5345).
|
||||
|
||||
## ROCm resolved issues
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-7.0.0"
|
||||
<default revision="refs/tags/rocm-7.0.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
|
||||
@@ -3,13 +3,13 @@ ROCm Version,7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP7,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,RHEL 8.10 [#rhel-700-past-60]_,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP7 [#sles-db-700-past-60]_,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#ol-700-mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Debian 12 [#sles-db-700-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,,,,,,,,,,,,
|
||||
,Rocky Linux 9,,,,,,,,,,,,,,,,,,
|
||||
,Rocky Linux 9 [#rl-700-past-60]_,,,,,,,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,,,,,,,,,,,,,,,,,,
|
||||
,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
@@ -19,25 +19,27 @@ ROCm Version,7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950,,,,,,,,,,,,,,,,,,
|
||||
,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os-past-60]_,,,,,,,,,,,,,,,,,,
|
||||
,gfx1201 [#RDNA-OS-700-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-700-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-700-past-60]_ [#rd-v710-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100 [#RDNA-OS-700-past-60]_,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030 [#RDNA-OS-700-past-60]_ [#rd-v620-past-60]_,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi325x-os-past-60]_ [#mi300x-os-past-60]_ [#mi300A-os-past-60]_,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a [#mi200x-os-past-60]_,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908 [#mi100-os-past-60]_,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,N/A,N/A,N/A,85f95ae,85f95ae,85f95ae,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,N/A,N/A,N/A,0.7.0,0.7.0,0.7.0,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,N/A,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
|
||||
|
@@ -58,11 +58,8 @@ compatibility and system requirements.
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.31
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,85f95ae
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,0.7.0
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
@@ -166,16 +163,18 @@ compatibility and system requirements.
|
||||
.. [#rl-700] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
|
||||
.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi350x-os] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
|
||||
.. [#RDNA-OS-700] **For ROCm 7.0.0** AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
|
||||
.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rd-v710] AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
|
||||
.. [#rd-v620] AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||
.. [#mi325x-os] AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#mi300x-os] AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
|
||||
.. [#mi300A-os] AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
|
||||
.. [#mi200x-os] AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||
.. [#mi100-os] AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||
.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#RDNA-OS-700] **For ROCm 7.0.0** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
|
||||
.. [#RDNA-OS] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rd-v710] **For ROCm 7.0.0** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
|
||||
.. [#rd-v620] **For ROCm 7.0.0** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||
.. [#mi325x-os] **For ROCm 7.0.0** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#mi300x-os] **For ROCm 7.0.0** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
|
||||
.. [#mi300A-os] **For ROCm 7.0.0** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
|
||||
.. [#mi200x-os] **For ROCm 7.0.0** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||
.. [#mi100-os] **For ROCm 7.0.0** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||
.. [#7700XT-OS] **Prior ROCm 7.0.0** - Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
@@ -248,12 +247,24 @@ Expand for full historical view of:
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#rhel-700-past-60] **For ROCm 7.0.0** - RHEL 8.10 is only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
|
||||
.. [#ol-700-mi300x-past-60] **For ROCm 7.0.0** - Oracle Linux 9 is supported only on AMD Instinct MI300X, MI350X, and MI355X. Oracle Linux 8 is only supported on AMD Instinct MI300X.
|
||||
.. [#mi300x-past-60] **Prior to ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#single-node-past-60] **Prior to ROCm 7.0.0 ** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi300x-past-60] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#sles-db-700-past-60] **For ROCm 7.0.0** - SLES 15 SP7 and Debian 12 are only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
|
||||
.. [#single-node-past-60] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#az-mi300x-past-60] Starting from ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710.
|
||||
.. [#az-mi300x-630-past-60] **Prior ROCm 6.4.0**- Azure Linux 3.0 is supported only on AMD Instinct MI300X.
|
||||
.. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rl-700-past-60] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
|
||||
.. [#mi350x-os-past-60] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
|
||||
.. [#RDNA-OS-700-past-60] **For ROCm 7.0.0** AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
|
||||
.. [#RDNA-OS-past-60] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rd-v710-past-60] **For ROCm 7.0.0** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
|
||||
.. [#rd-v620-past-60] **For ROCm 7.0.0** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||
.. [#mi325x-os-past-60] **For ROCm 7.0.0** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#mi300x-os-past-60] **For ROCm 7.0.0** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
|
||||
.. [#mi300A-os-past-60] **For ROCm 7.0.0** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
|
||||
.. [#mi200x-os-past-60] **For ROCm 7.0.0** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||
.. [#mi100-os-past-60] **For ROCm 7.0.0** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||
.. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
@@ -264,13 +275,13 @@ Expand for full historical view of:
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#verl_compat] verl is only supported on ROCm 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#ray_compat] Ray is only supported on ROCm 6.4.1.
|
||||
.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 6.4.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#verl_compat-past-60] verl is only supported on ROCm 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat-past-60] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#taichi_compat-past-60] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#ray_compat-past-60] Ray is only supported on ROCm 6.4.1.
|
||||
.. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 6.4.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
@@ -81,7 +81,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
@@ -90,7 +90,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
@@ -99,7 +99,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
@@ -108,7 +108,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
@@ -117,7 +117,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
|
||||
@@ -150,6 +150,15 @@ model_groups:
|
||||
url: https://huggingface.co/Qwen/Qwen2-7B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- group: Stable Diffusion
|
||||
tag: sd
|
||||
models:
|
||||
- model: Stable Diffusion XL
|
||||
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
|
||||
model_repo: SDXL
|
||||
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora]
|
||||
- group: Flux
|
||||
tag: flux
|
||||
models:
|
||||
@@ -159,3 +168,11 @@ model_groups:
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
precision: BF16
|
||||
training_modes: [pretrain]
|
||||
- group: NCF
|
||||
tag: ncf
|
||||
models:
|
||||
- model: NCF
|
||||
mad_tag: pyt_ncf_training
|
||||
model_repo:
|
||||
url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
|
||||
precision: FP32
|
||||
|
||||
@@ -122,8 +122,8 @@ drivers.
|
||||
git clone https://github.com/ROCm/MAD.git
|
||||
cd MAD/docker
|
||||
docker build \
|
||||
-t sglang_dissag_pd_image \
|
||||
-f sglang_dissag_inference.ubuntu.amd.Dockerfile .
|
||||
-t sglang_disagg_pd_image \
|
||||
-f sglang_disagg_inference.ubuntu.amd.Dockerfile .
|
||||
|
||||
Benchmarking
|
||||
============
|
||||
@@ -132,16 +132,16 @@ The `<https://github.com/ROCm/MAD/tree/develop/scripts/sglang_dissag>`__
|
||||
repository contains scripts to launch SGLang inference with prefill/decode
|
||||
disaggregation via Mooncake for supported models.
|
||||
|
||||
* `scripts/sglang_dissag/run_xPyD_models.slurm <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_dissag/run_xPyD_models.slurm>`__
|
||||
* `scripts/sglang_dissag/run_xPyD_models.slurm <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/run_xPyD_models.slurm>`__
|
||||
-- the main Slurm batch script to launch Docker containers on all nodes using ``sbatch`` or ``salloc``.
|
||||
|
||||
* `scripts/sglang_dissag/sglang_disagg_server.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_dissag/sglang_disagg_server.sh>`__
|
||||
* `scripts/sglang_dissag/sglang_disagg_server.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/sglang_disagg_server.sh>`__
|
||||
-- the entrypoint script that runs inside each container to start the correct service -- proxy, prefill, or decode.
|
||||
|
||||
* `scripts/sglang_dissag/benchmark_xPyD.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_dissag/benchmark_xPyD.sh>`__
|
||||
* `scripts/sglang_dissag/benchmark_xPyD.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/benchmark_xPyD.sh>`__
|
||||
-- the benchmark script to run the GSM8K accuracy benchmark and the SGLang benchmarking tool for performance measurement.
|
||||
|
||||
* `scripts/sglang_dissag/benchmark_parser.py <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_dissag/benchmark_parser.py>`__
|
||||
* `scripts/sglang_dissag/benchmark_parser.py <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/benchmark_parser.py>`__
|
||||
-- the log parser script to be run on the concurrency benchmark log file to generate tabulated data.
|
||||
|
||||
Launch the service
|
||||
@@ -163,10 +163,10 @@ allocated nodes.
|
||||
# Clone the MAD repo if you haven't already and
|
||||
# navigate to the scripts directory
|
||||
git clone https://github.com/ROCm/MAD.git
|
||||
cd MAD/scripts/sglang_dissag/
|
||||
cd MAD/scripts/sglang_disagg/
|
||||
|
||||
# Slurm sbatch run command
|
||||
export DOCKER_IMAGE_NAME=sglang_dissag_pd_image
|
||||
export DOCKER_IMAGE_NAME=sglang_disagg_pd_image
|
||||
export xP=<num_prefill_nodes>
|
||||
export yD=<num_decode_nodes>
|
||||
export MODEL_NAME={{ model.model_repo }}
|
||||
|
||||
@@ -406,8 +406,6 @@ benchmark results:
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- See the ROCm/maxtext benchmarking README at `<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/readme.md>`__.
|
||||
|
||||
- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
Training a model with PyTorch on ROCm
|
||||
**************************************
|
||||
|
||||
.. note::
|
||||
|
||||
Primus with the PyTorch torchtitan backend is intended to supersede the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
|
||||
See :doc:`primus-pytorch` for details.
|
||||
|
||||
PyTorch is an open-source machine learning framework that is widely used for
|
||||
model training with GPU-optimized components for transformer-based models.
|
||||
|
||||
@@ -51,7 +56,7 @@ vary by model -- select one to get started.
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
<div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -89,9 +94,11 @@ vary by model -- select one to get started.
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if model.training_modes %}
|
||||
* - {{ model.model }}
|
||||
- ``{{ model.training_modes | join('``, ``') }}``
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
@@ -154,20 +161,23 @@ Run training
|
||||
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following run command is tailored to {{ model.model }}.
|
||||
See :ref:`amd-pytorch-training-model-support` to switch to another available model.
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
2. For example, use this command to run the performance benchmark test on the {{ model.model }} model
|
||||
using one node with the {{ model.precision }} data type on the host machine.
|
||||
|
||||
@@ -189,6 +199,17 @@ Run training
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following commands are tailored to {{ model.model }}.
|
||||
See :ref:`amd-pytorch-training-model-support` to switch to another available model.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. rubric:: Download the Docker image and required packages
|
||||
|
||||
1. Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
70
tools/rocm-build/rocm-7.0.1.xml
Normal file
70
tools/rocm-build/rocm-7.0.1.xml
Normal file
@@ -0,0 +1,70 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-7.0.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="ROCm" revision="roc-7.0.x" />
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="aqlprofile" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocm-examples" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="rocprofiler-compute" />
|
||||
<project name="rocprofiler-systems" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="hip" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
<project name="hipother" />
|
||||
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||
<project name="half" />
|
||||
<project name="llvm-project" />
|
||||
<project name="spirv-llvm-translator" />
|
||||
<!-- gdb projects -->
|
||||
<project name="ROCdbgapi" />
|
||||
<project name="ROCgdb" />
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<!-- The following components have been migrated to rocm-libraries:
|
||||
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||
rocSPARSE rocThrust Tensile -->
|
||||
<project groups="mathlibs" name="rocm-libraries" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
<project groups="mathlibs" name="TransferBench" />
|
||||
<!-- Projects for OpenMP-Extras -->
|
||||
<project name="aomp" path="openmp-extras/aomp" />
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
|
||||
<project name="flang" path="openmp-extras/flang" />
|
||||
</manifest>
|
||||
Reference in New Issue
Block a user