Compare commits

...

2 Commits

Author SHA1 Message Date
Alex Xu
a4b1b2cc67 rocm-docs-core experiment 2025-06-26 15:58:18 -04:00
Peter Park
4f592f8949 [docs/7.0.0-alpha] Add docs for 7.0 alpha (#4978) 2025-06-26 15:47:42 -04:00
11 changed files with 1137 additions and 255 deletions

View File

@@ -1,3 +1,18 @@
Datacenter
GST
IET
LTO
MX
Microscaling
NANOO
ROCprof
affinitization
amdclang
benefitting
demangled
inlined
microscaling
roofline
AAC
ABI
ACE

View File

@@ -34,69 +34,86 @@ project = "ROCm Documentation"
project_path = os.path.abspath(".").replace("\\", "/")
author = "Advanced Micro Devices, Inc."
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
version = "6.4.1"
release = "6.4.1"
version = "7.0 Alpha"
release = "7.0 Alpha"
setting_all_article_info = True
all_article_info_os = ["linux", "windows"]
all_article_info_author = ""
# pages with specific settings
article_pages = [
{"file": "about/release-notes", "os": ["linux"], "date": "2025-05-07"},
{"file": "release/changelog", "os": ["linux"],},
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
{"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
{"file": "how-to/deep-learning-rocm", "os": ["linux"]},
{"file": "preview/index", "os": ["linux"],},
{"file": "preview/release", "os": ["linux"],},
{"file": "preview/install/index", "os": ["linux"],},
{"file": "preview/install/instinct-driver", "os": ["linux"],},
{"file": "preview/install/rocm", "os": ["linux"],},
{"file": "preview/benchmark-docker/index", "os": ["linux"],},
{"file": "preview/benchmark-docker/training", "os": ["linux"],},
{"file": "preview/benchmark-docker/pre-training-megatron-lm-llama-3-8b", "os": ["linux"],},
{"file": "preview/benchmark-docker/pre-training-torchtitan-llama-3-70b", "os": ["linux"],},
{"file": "preview/benchmark-docker/fine-tuning-lora-llama-2-70b", "os": ["linux"],},
{"file": "preview/benchmark-docker/inference", "os": ["linux"],},
{"file": "preview/benchmark-docker/inference-vllm-llama-3.1-405b-fp4", "os": ["linux"],},
{"file": "preview/benchmark-docker/inference-sglang-deepseek-r1-fp4", "os": ["linux"],},
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
{"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
{"file": "how-to/system-optimization/index", "os": ["linux"]},
{"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
{"file": "how-to/system-optimization/mi200", "os": ["linux"]},
{"file": "how-to/system-optimization/mi100", "os": ["linux"]},
{"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
{"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
{"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
{"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
{"file": "how-to/system-debugging", "os": ["linux"]},
{"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
# {"file": "about/release-notes", "os": ["linux"], "date": "2025-06-26"},
# {"file": "release/changelog", "os": ["linux"],},
# {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
# {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
# {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
# {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
# {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
#
# {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
#
# {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
#
# {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
#
# {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
#
# {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
# {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
#
# {"file": "how-to/system-optimization/index", "os": ["linux"]},
# {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
# {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
# {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
# {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
# {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
# {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
# {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
# {"file": "how-to/system-debugging", "os": ["linux"]},
# {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
]
external_toc_path = "./sphinx/_toc.yml"
# Options to improve documentation build time for preview release documentation
external_toc_exclude_missing = True # don't build files that aren't in the TOC
external_projects_remote_repository = "" # don't fetch data to resolve intersphinx xrefs
# Add the _extensions directory to Python's search path
sys.path.append(str(Path(__file__).parent / 'extension'))
@@ -122,7 +139,7 @@ html_static_path = ["sphinx/static/css", "extension/how-to/rocm-for-ai/inference
html_css_files = ["rocm_custom.css", "rocm_rn.css", "vllm-benchmark.css"]
html_js_files = ["vllm-benchmark.js"]
html_title = "ROCm Documentation"
html_title = "ROCm 7.0 Alpha documentation"
html_theme_options = {"link_main_doc": False}

View File

@@ -1,5 +1,5 @@
.. meta::
:description: How to install ROCm and popular machine learning frameworks.
:description: How to install the ROCm 7.0 preview
:keywords: ROCm, AI, LLM, train, fine-tune, FSDP, DeepSpeed, LLaMA, tutorial
.. _rocm-for-ai-install:

26
docs/preview/index.md Normal file
View File

@@ -0,0 +1,26 @@
---
myst:
html_meta:
"description": "AMD ROCm 7.0 Alpha documentation"
"keywords": "Radeon, open, compute, platform, install, how, conceptual, reference, home, docs"
---
# AMD ROCm 7.0 Alpha documentation
AMD ROCm is an open-source software platform optimized to extract HPC and AI
workload performance from AMD Instinct™ accelerators while maintaining
compatibility with industry software frameworks.
This documentation is intended to provide early access information about the ROCm
software Alpha release. The preview release provides early access to new
features under development for testing for users to provide feedback.
It is not recommended for production use.
```{note}
See [ROCm documentation](https://rocm.docs.amd.com/en/latest/) for the latest stable release for use in production.
```
The documentation includes:
- [ROCm 7.0 Alpha release notes](release.rst) with feature details and support matrix
- [Installation instructions](install/index.rst) for the ROCm 7.0 Alpha and the Instinct Driver

View File

@@ -0,0 +1,28 @@
.. meta::
:description: Installation via native package manager
:keywords: ROCm install, installation instructions, package manager, native package manager, AMD,
ROCm
****************************************
ROCm 7.0 Alpha installation instructions
****************************************
The ROCm 7.0 Alpha must be installed using your Linux distribution's native
package manager. This release supports specific hardware and software
configurations -- before installing, see the :ref:`supported OSes and hardware
<alpha-system-requirements>` outlined in the Alpha release notes.
.. important::
Upgrades and downgrades are not supported. You must install any existing
ROCm installation before installing the Alpha build.
.. grid:: 2
.. grid-item-card:: Install ROCm
See :doc:`Install the ROCm 7.0 Alpha via package manager <rocm>`.
.. grid-item-card:: Install Instinct Driver
See :doc:`Install the Instinct Driver via package manager <instinct-driver>`.

View File

@@ -0,0 +1,212 @@
***********************************************
Install the Instinct Driver via package manager
***********************************************
This section describes how to install the Instinct Driver using ``apt`` on
Ubuntu 22.04 or 24.04, or ``dnf`` on Red Hat Enterprise Linux 9.6.
.. important::
Upgrades and downgrades are not supported. You must uninstall any existing
ROCm installation before installing the preview build.
Prerequisites
=============
Before installing, complete the following prerequisites.
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
Install kernel headers.
.. code-block:: shell
sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
Install kernel headers.
.. code-block:: shell
sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
.. tab-item:: RHEL 9.6
:sync: rhel-96
1. Register your Enterprise Linux.
.. code-block:: shell
subscription-manager register --username <username> --password <password>
subscription-manager attach --auto
2. Update your Enterprise Linux.
.. code-block:: shell
sudo dnf update --releasever=9.6 --exclude=\*release\*
3. Install kernel headers.
.. code-block:: shell
sudo dnf install "kernel-headers-$(uname -r)" "kernel-devel-$(uname -r)" "kernel-devel-matched-$(uname -r)"
Register ROCm repositories
==========================
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
1. Add the package signing key.
.. code-block:: shell
# Make the directory if it doesn't exist yet.
# This location is recommended by the distribution maintainers.
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
# Download the key, convert the signing-key to a full
# keyring required by apt and store in the keyring directory.
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
2. Register the kernel mode driver.
.. code-block:: shell
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/30.10_alpha/ubuntu jammy main" \
| sudo tee /etc/apt/sources.list.d/amdgpu.list
sudo apt update
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
1. Add the package signing key.
.. code-block:: shell
# Make the directory if it doesn't exist yet.
# This location is recommended by the distribution maintainers.
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
# Download the key, convert the signing-key to a full
# keyring required by apt and store in the keyring directory.
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
2. Register the kernel mode driver.
.. code-block:: shell
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/30.10_alpha/ubuntu noble main" \
| sudo tee /etc/apt/sources.list.d/amdgpu.list
sudo apt update
.. tab-item:: RHEL 9.6
:sync: rhel-96
.. code-block:: shell
sudo tee /etc/yum.repos.d/amdgpu.repo <<EOF
[amdgpu]
name=amdgpu
baseurl=https://repo.radeon.com/amdgpu/30.10_alpha/rhel/9.6/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
sudo dnf clean all
Install the kernel driver
=========================
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
.. code-block:: shell
sudo apt install amdgpu-dkms
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
.. code-block:: shell
sudo apt install amdgpu-dkms
.. tab-item:: RHEL 9.6
:sync: rhel-96
.. code-block:: shell
sudo dnf install amdgpu-dkms
Uninstalling
============
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
1. Uninstall the kernel mode driver.
.. code-block:: shell
sudo apt autoremove amdgpu-dkms
2. Remove AMDGPU repositories.
.. code-block:: shell
sudo rm /etc/apt/sources.list.d/amdgpu.list
# Clear the cache and clean the system
sudo rm -rf /var/cache/apt/*
sudo apt clean all
sudo apt update
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
1. Uninstall the kernel mode driver.
.. code-block:: shell
sudo apt autoremove amdgpu-dkms
2. Remove AMDGPU repositories.
.. code-block:: shell
sudo rm /etc/apt/sources.list.d/amdgpu.list
# Clear the cache and clean the system
sudo rm -rf /var/cache/apt/*
sudo apt clean all
sudo apt update
.. tab-item:: RHEL 9.6
:sync: rhel-96
1. Uninstall the kernel mode driver.
.. code-block:: shell
sudo dnf remove amdgpu-dkms
2. Remove AMDGPU repositories.
.. code-block:: shell
sudo rm /etc/yum.repos.d/amdgpu.repo
# Clear the cache and clean the system
sudo rm -rf /var/cache/dnf
sudo dnf clean all

View File

@@ -0,0 +1,288 @@
**********************************************
Install the ROCm 7.0 Alpha via package manager
**********************************************
This page describes how to install the ROCm 7.0 Alpha build using ``apt`` on
Ubuntu 22.04 or 24.04, or ``dnf`` on Red Hat Enterprise Linux 9.6.
.. important::
Upgrades and downgrades are not supported. You must uninstall any existing
ROCm installation before installing the preview build.
Prerequisites
=============
Before installing, complete the following prerequisites.
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
1. Install development packages.
.. code-block:: shell
sudo apt install python3-setuptools python3-wheel
2. Configure user permissions for GPU access.
.. code-block:: shell
sudo usermod -a -G render,video $LOGNAME
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
1. Install development packages.
.. code-block:: shell
sudo apt install python3-setuptools python3-wheel
2. Configure user permissions for GPU access.
.. code-block:: shell
sudo usermod -a -G render,video $LOGNAME
.. tab-item:: RHEL 9.6
:sync: rhel-96
1. Register your Enterprise Linux.
.. code-block:: shell
subscription-manager register --username <username> --password <password>
subscription-manager attach --auto
2. Update your Enterprise Linux.
.. code-block:: shell
sudo dnf update --releasever=9.6 --exclude=\*release\*
3. Install additional package repositories.
Add the EPEL repository:
.. code-block:: shell
wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
sudo rpm -ivh epel-release-latest-9.noarch.rpm
Enable the CodeReady Linux Build (CRB) repository.
.. code-block:: shell
sudo dnf install dnf-plugin-config-manager
sudo crb enable
4. Install development packages.
.. code-block:: shell
sudo dnf install python3-setuptools python3-wheel
5. Configure user permissions for GPU access.
.. code-block:: shell
sudo usermod -a -G render,video $LOGNAME
Register ROCm repositories
==========================
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
1. Add the package signing key.
.. code-block:: shell
# Make the directory if it doesn't exist yet.
# This location is recommended by the distribution maintainers.
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
# Download the key, convert the signing-key to a full
# keyring required by apt and store in the keyring directory.
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
2. Register ROCm packages.
.. code-block:: shell
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/7.0_alpha jammy main" \
| sudo tee /etc/apt/sources.list.d/rocm.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/graphics/7.0_alpha/ubuntu jammy main" \
| sudo tee /etc/apt/sources.list.d/rocm-graphics.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
| sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
1. Add the package signing key.
.. code-block:: shell
# Make the directory if it doesn't exist yet.
# This location is recommended by the distribution maintainers.
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
# Download the key, convert the signing-key to a full
# keyring required by apt and store in the keyring directory.
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
2. Register ROCm packages.
.. code-block:: shell
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/7.0_alpha noble main" \
| sudo tee /etc/apt/sources.list.d/rocm.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/graphics/7.0_alpha/ubuntu noble main" \
| sudo tee /etc/apt/sources.list.d/rocm-graphics.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
| sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
.. tab-item:: RHEL 9.6
:sync: rhel-96
.. code-block:: shell
sudo tee /etc/yum.repos.d/rocm.repo <<EOF
[ROCm-7.0.0]
name=ROCm7.0.0
baseurl=https://repo.radeon.com/rocm/el9/7.0_alpha/main
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
sudo tee /etc/yum.repos.d/rocm-graphics.repo <<EOF
[ROCm-7.0.0-Graphics]
name=ROCm7.0.0-Graphics
baseurl=https://repo.radeon.com/graphics/7.0_alpha/rhel/9/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
sudo dnf clean all
Install ROCm
============
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
.. code-block:: shell
sudo apt install rocm
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
.. code-block:: shell
sudo apt install rocm
.. tab-item:: RHEL 9.6
:sync: rhel-96
.. code-block:: shell
sudo dnf install rocm
.. _uninstall-rocm:
Uninstalling
============
.. tab-set::
.. tab-item:: Ubuntu 22.04
:sync: ubuntu-22
1. Uninstall specific meta packages.
.. code-block:: shell
sudo apt autoremove rocm
2. Uninstall ROCm packages.
.. code-block:: shell
sudo apt autoremove rocm-core
3. Remove ROCm repositories.
.. code-block:: shell
sudo rm /etc/apt/sources.list.d/rocm*.list
# Clear the cache and clean the system
sudo rm -rf /var/cache/apt/*
sudo apt clean all
sudo apt update
.. tab-item:: Ubuntu 24.04
:sync: ubuntu-24
1. Uninstall specific meta packages.
.. code-block:: shell
sudo apt autoremove rocm
2. Uninstall ROCm packages.
.. code-block:: shell
sudo apt autoremove rocm-core
3. Remove ROCm repositories.
.. code-block:: shell
sudo rm /etc/apt/sources.list.d/rocm*.list
# Clear the cache and clean the system
sudo rm -rf /var/cache/apt/*
sudo apt clean all
sudo apt update
.. tab-item:: RHEL 9.6
:sync: rhel-96
1. Uninstall specific meta packages.
.. code-block:: shell
sudo dnf remove rocm
2. Uninstall ROCm packages.
.. code-block:: shell
sudo dnf remove rocm-core amdgpu-core
3. Remove ROCm repositories.
.. code-block:: shell
sudo rm /etc/yum.repos.d/rocm*.repo*
# Clear the cache and clean the system
sudo rm -rf /var/cache/dnf
sudo dnf clean all

270
docs/preview/release.rst Normal file
View File

@@ -0,0 +1,270 @@
****************************
ROCm 7.0 Alpha release notes
****************************
The ROCm 7.0 Alpha is an early look into the upcoming ROCm 7.0 major release,
which introduces functional support for AMD Instinct™ MI355X and MI350X
on bare metal, single node systems. It also includes new features for current-generation
MI300X, MI200, and MI100 series accelerators. This is an alpha-quality release;
expect issues and limitations that will be addressed in upcoming previews.
.. important::
This Alpha release is not intended for performance evaluation.
For the latest stable release for production-level functionality,
see `ROCm documentation <https://rocm.docs.amd.com/en/latest/>`_.
This page provides a high-level summary of supported systems, key changes to the ROCm software
stack, developments related to AI frameworks, current known limitations, and installation
information.
.. _alpha-system-requirements:
Operating system and hardware support
=====================================
Only the accelerators and operating systems listed here are supported. Multi-node systems,
virtualized environments, and GPU partitioning are not supported in this Alpha.
* AMD accelerator: Instinct MI355X, MI350X, MI325X [#mi325x]_, MI300X, MI300A, MI250X, MI250, MI210, MI100
* Operating system: Ubuntu 22.04, Ubuntu 24.04, or RHEL 9.6
* System type: Bare metal, single node only
* Partitioning: Not supported
.. [#mi325x] MI325X is only supported with Ubuntu 22.04.
.. _alpha-highlights:
Alpha release highlights
========================
This section highlights key features enabled in the ROCm 7.0 Alpha.
AI frameworks
-------------
PyTorch
~~~~~~~
The ROCm 7.0 Alpha enables the following PyTorch features:
* Support for PyTorch 2.7
* Integrated Fused Rope kernels in APEX
* Compilation of Python C++ extensions using amdclang++
* Support for channels-last NHWC format for convolutions via MIOpen
TensorFlow
~~~~~~~~~~
This Alpha enables support for TensorFlow 2.19.
vLLM
~~~~
* Support for Open Compute Project (OCP) ``FP8`` data type
* ``FP4`` precision for Llama 3.1 405B
Libraries
---------
.. _alpha-new-data-type-support:
New data type support
~~~~~~~~~~~~~~~~~~~~~
MX-compliant data types bring microscaling support to ROCm. For more information, see the `OCP
Microscaling (MX) Formats Specification
<https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf>`_. The ROCm
7.0 Alpha enables functional support for MX data types ``FP4``, ``FP6``, and ``FP8`` on MI355X
systems in these ROCm libraries:
* Composable Kernel (``FP4`` and ``FP8`` only)
* hipBLASLt
* MIGraphX (``FP4`` only)
The following libraries are updated to support the Open Compute Project (OCP) floating-point ``FP8``
format on MI355X instead of the NANOO ``FP8`` format:
* Composable Kernel
* hipBLASLt
* hipSPARSELt
* MIGraphX
* rocWMMA
MIGraphX now also supports ``BF16``.
RCCL support
~~~~~~~~~~~~
RCCL is supported for single-node functional usage only. Multi-node communication capabilities will
be supported in future preview releases.
MIGraphX
~~~~~~~~
* Support for OCP ``FP8`` and MX ``FP4`` data types on MI355X
* Support for ``BF16`` on all hardware
* Support for PyTorch 2.7 via Torch-MIGraphX
Tools
-----
AMD SMI
~~~~~~~
* The default output of the ``amd-smi`` CLI now displays a simple table view.
* New APIs: CPU affinity shows GPUs' affinitization to each CPU in a system.
ROCgdb
~~~~~~
* MX data types support: ``FP4``, ``FP6``, and ``FP8``
ROCprof Compute Viewer
~~~~~~~~~~~~~~~~~~~~~~
* Initial release: ``rocprof-compute-viewer`` allows the visualization of ``rocprofv3``'s thread
trace output
ROCprof Trace Decoder
~~~~~~~~~~~~~~~~~~~~~
* Initial release: ``rocprof-trace-decoder`` a plugin API for decoding thread traces
ROCm Compute Profiler
~~~~~~~~~~~~~~~~~~~~~
* MX data types support: ``FP4``, ``FP6``, and ``FP8``
* MI355X and MI350X performance counters: CPC, SPI, SQ, TA/TD/TCP, and TCC
* Enhanced roofline analysis with support for ``INT8``, ``INT32``, ``FP8``, ``FP16``, and ``BF16``
data types
* Roofline distinction for ``FP32`` and ``FP64`` data types
* Selective kernel profiling
ROCm Systems Profiler
~~~~~~~~~~~~~~~~~~~~~
* Trace support for computer vision APIs: H264, H265, AV1, VP9, and JPEG
* Trace support for computer vision engine activity
* OpenMP for C++ language and kernel activity support
ROCm Validation Suite
~~~~~~~~~~~~~~~~~~~~~
* MI355X and MI350X accelerator support in the IET (Integrated Execution Test), GST (GPU Stress Test), and Babel (memory bandwidth test) modules.
ROCprofiler-SDK
~~~~~~~~~~~~~~~
* Program counter (PC) sampling (host trap-based)
* API for profiling applications using thread traces (beta)
* Support in ``rocprofv3`` CLI tool for thread trace service
HIP
---
The HIP runtime includes support for:
* Open Compute Project (OCP) MX floating-point ``FP4``, ``FP6``, and ``FP8`` data types and APIs
* Improved logging by adding more precise pointer information and launch arguments for better
tracking and debugging in dispatch methods
In addition, the HIP runtime includes the following functional improvements which improve runtime
performance and user experience:
* Optimized HIP runtime lock contention in some events and kernel handling APIs. Event processing
and memory object look-ups now use the shared mutex implementation. Kernel object look-up during
C++ kernel launch can now avoid a global lock. These changes improve performance in certain
applications with high usage, particularly for multiple GPUs, multiple threads, and HIP streams
per GPU.
* Programmatic support for scratch buffer limit on GPU device. Developers can now change the default
allocation size with the expected scratch limit.
* Unified managed buffer and kernel argument buffers so the HIP runtime no longer needs to create
and load a separate kernel argument buffer.
* Refactored memory validation to create a unique function to validate a variety of memory copy
operations.
* Shader names are now demangled for more readable kernel logs
See :ref:`HIP compatibility <hip-known-limitation>`.
Compilers
---------
* The compiler driver now uses parallel code generation by default when compiling using full LTO
(including when using the ``-fgpu-rdc`` option) for HIP. This divides the optimized LLVM IR module
into roughly equal partitions before instruction selection and lowering, which can help improve
build times.
Each kernel in the linked LTO module may be put in a separate partition, and any non-inlined
function it depends on may be copied alongside it. Thus, while parallel code generation can
improve build time, it can duplicate non-inlined, non-kernel functions across multiple partitions,
potentially increasing the binary size of the final object file.
* Compiler option ``-flto-partitions=<num>``.
Equivalent to the ``--lto-partitions=<num>`` LLD option. Controls the number of partitions used for
parallel code generation when using full LTO (including when using ``-fgpu-rdc``). The number of
partitions must be greater than 0, and a value of 1 disables the feature. The default value is 8.
Developers are encouraged to experiment with different numbers of partitions using the
``-flto-partitions`` Clang command line option. Recommended values are 1 to 16 partitions, with
especially large projects containing many kernels potentially benefitting from up to 64
partitions. It is not recommended to use a value greater than the number of threads on the
machine. Smaller projects, or projects that contain only a few kernels may also not benefit at
all from partitioning and may even see a slight increase in build time due to the small overhead
of analyzing and partitioning the modules.
* HIPIFY now supports NVIDIA CUDA 12.8.0 APIs. See
`<https://github.com/ROCm/HIPIFY/blob/amd-develop/docs/reference/supported_apis.md>`_ for more
information.
Instinct Driver / ROCm packaging separation
-------------------------------------------
The Instinct Driver is now distributed separately from the ROCm software stack -- it is now stored
in its own location in the package repository at `<repo.radeon.com>`_ under ``/amdgpu/``.
The first release is designated as Instinct Driver version 30.10 See `ROCm Gets Modular: Meet the
Instinct Datacenter GPU Driver
<https://rocm.blogs.amd.com/ecosystems-and-partners/instinct-gpu-driver/README.html>`_ for more
information.
Forward and backward compatibility between the Instinct Driver and ROCm are not supported in this
Alpha release. See the :doc:`installation instructions <install/index>`.
Known limitations
=================
.. _hip-known-limitation:
HIP compatibility
-----------------
HIP runtime APIs in the ROCm 7.0 Alpha do not include backward-incompatible changes. See `HIP 7.0 Is
Coming: What You Need to Know to Stay Ahead
<https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0:-guidance-on-upcoming-compatibility-changes/README.html>`_ for more information.

View File

@@ -3,195 +3,206 @@
defaults:
numbered: False
maxdepth: 6
root: index
root: preview/index
subtrees:
- entries:
- file: what-is-rocm.rst
- file: about/release-notes.md
title: Release notes
- file: compatibility/compatibility-matrix.rst
title: Compatibility matrix
entries:
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html
title: Linux system requirements
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
title: Windows system requirements
- caption: Install
entries:
- url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
title: ROCm on Linux
- url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
title: HIP SDK on Windows
- url: https://rocm.docs.amd.com/projects/radeon/en/latest/index.html
title: ROCm on Radeon GPUs
- file: how-to/deep-learning-rocm.md
title: Deep learning frameworks
- file: how-to/build-rocm.rst
title: Build ROCm from source
- caption: How to
entries:
- file: how-to/rocm-for-ai/index.rst
title: Use ROCm for AI
- file: preview/release.rst
title: Alpha release notes
- file: preview/install/index.rst
title: Installation
subtrees:
- entries:
- file: how-to/rocm-for-ai/install.rst
title: Installation
- file: how-to/rocm-for-ai/system-health-check.rst
title: System health benchmarks
- file: how-to/rocm-for-ai/training/index.rst
title: Training
subtrees:
- entries:
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
title: Train a model with Megatron-LM
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
title: Train a model with PyTorch
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
title: Train a model with JAX MaxText
- file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
title: Train a model with LLM Foundry
- file: how-to/rocm-for-ai/training/scale-model-training.rst
title: Scale model training
- file: how-to/rocm-for-ai/fine-tuning/index.rst
title: Fine-tuning LLMs
subtrees:
- entries:
- file: how-to/rocm-for-ai/fine-tuning/overview.rst
title: Conceptual overview
- file: how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference.rst
title: Fine-tuning
subtrees:
- entries:
- file: how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
title: Use a single accelerator
- file: how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
title: Use multiple accelerators
- file: how-to/rocm-for-ai/inference/index.rst
title: Inference
subtrees:
- entries:
- file: how-to/rocm-for-ai/inference/hugging-face-models.rst
title: Run models from Hugging Face
- file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
title: LLM inference frameworks
- file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
title: vLLM inference performance testing
- file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
title: PyTorch inference performance testing
- file: how-to/rocm-for-ai/inference/deploy-your-model.rst
title: Deploy your model
- file: how-to/rocm-for-ai/inference-optimization/index.rst
title: Inference optimization
subtrees:
- entries:
- file: how-to/rocm-for-ai/inference-optimization/model-quantization.rst
- file: how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries.rst
- file: how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel.md
title: Optimize with Composable Kernel
- file: how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel.rst
title: Optimize Triton kernels
- file: how-to/rocm-for-ai/inference-optimization/profiling-and-debugging.rst
title: Profile and debug
- file: how-to/rocm-for-ai/inference-optimization/workload.rst
title: Workload optimization
- url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
title: AI tutorials
- file: how-to/rocm-for-hpc/index.rst
title: Use ROCm for HPC
- file: how-to/system-optimization/index.rst
title: System optimization
- file: how-to/gpu-performance/mi300x.rst
title: AMD Instinct MI300X performance guides
- file: how-to/system-debugging.md
- file: conceptual/compiler-topics.md
title: Use advanced compiler features
subtrees:
- entries:
- url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html
title: ROCm compiler infrastructure
- url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html
title: Use AddressSanitizer
- url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html
title: OpenMP support
- file: how-to/setting-cus
title: Set the number of CUs
- file: how-to/Bar-Memory.rst
title: Troubleshoot BAR access limitation
- url: https://github.com/amd/rocm-examples
title: ROCm examples
- caption: Conceptual
entries:
- file: conceptual/gpu-arch.md
title: GPU architecture overview
subtrees:
- entries:
- file: conceptual/gpu-arch/mi300.md
title: MI300 microarchitecture
subtrees:
- entries:
- url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
title: AMD Instinct MI300/CDNA3 ISA
- url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
title: White paper
- file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
title: MI300 and MI200 Performance counter
- file: conceptual/gpu-arch/mi250.md
title: MI250 microarchitecture
subtrees:
- entries:
- url: https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf
title: AMD Instinct MI200/CDNA2 ISA
- url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
title: White paper
- file: conceptual/gpu-arch/mi100.md
title: MI100 microarchitecture
subtrees:
- entries:
- url: https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf
title: AMD Instinct MI100/CDNA1 ISA
- url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
title: White paper
- file: conceptual/file-reorg.md
title: File structure (Linux FHS)
- file: conceptual/gpu-isolation.md
title: GPU isolation techniques
- file: conceptual/cmake-packages.rst
title: Using CMake
- file: conceptual/ai-pytorch-inception.md
title: Inception v3 with PyTorch
- caption: Reference
entries:
- file: reference/api-libraries.md
title: ROCm libraries
- file: reference/rocm-tools.md
title: ROCm tools, compilers, and runtimes
- file: reference/gpu-arch-specs.rst
- file: reference/gpu-atomics-operation.rst
- file: reference/precision-support.rst
title: Precision support
- file: reference/graph-safe-support.rst
title: Graph safe support
- caption: Contribute
entries:
- file: contribute/contributing.md
title: Contributing to the ROCm documentation
subtrees:
- entries:
- file: contribute/toolchain.md
title: ROCm documentation toolchain
- file: contribute/building.md
- file: contribute/feedback.md
title: Providing feedback about the ROCm documentation
- file: about/license.md
title: ROCm licenses
- file: preview/install/rocm
title: Install ROCm
- file: preview/install/instinct-driver
title: Install Instinct Driver
# - entries:
# - file: what-is-rocm.rst
# - file: about/release-notes.md
# title: Release notes
# - file: compatibility/compatibility-matrix.rst
# title: Compatibility matrix
# entries:
# - url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/reference/system-requirements.html
# title: Linux system requirements
# - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
# title: Windows system requirements
#
# - caption: Install
# entries:
# - url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/
# title: ROCm on Linux
# - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
# title: HIP SDK on Windows
# - url: https://rocm.docs.amd.com/projects/radeon/en/latest/index.html
# title: ROCm on Radeon GPUs
# - file: how-to/deep-learning-rocm.md
# title: Deep learning frameworks
# - file: how-to/build-rocm.rst
# title: Build ROCm from source
#
# - caption: How to
# entries:
# - file: how-to/rocm-for-ai/index.rst
# title: Use ROCm for AI
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/install.rst
# title: Installation
# - file: how-to/rocm-for-ai/system-health-check.rst
# title: System health benchmarks
# - file: how-to/rocm-for-ai/training/index.rst
# title: Training
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
# title: Train a model with Megatron-LM
# - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
# title: Train a model with PyTorch
# - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
# title: Train a model with JAX MaxText
# - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
# title: Train a model with LLM Foundry
# - file: how-to/rocm-for-ai/training/scale-model-training.rst
# title: Scale model training
#
# - file: how-to/rocm-for-ai/fine-tuning/index.rst
# title: Fine-tuning LLMs
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/fine-tuning/overview.rst
# title: Conceptual overview
# - file: how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference.rst
# title: Fine-tuning
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
# title: Use a single accelerator
# - file: how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
# title: Use multiple accelerators
#
# - file: how-to/rocm-for-ai/inference/index.rst
# title: Inference
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/inference/hugging-face-models.rst
# title: Run models from Hugging Face
# - file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
# title: LLM inference frameworks
# - file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
# title: vLLM inference performance testing
# - file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
# title: PyTorch inference performance testing
# - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
# title: Deploy your model
#
# - file: how-to/rocm-for-ai/inference-optimization/index.rst
# title: Inference optimization
# subtrees:
# - entries:
# - file: how-to/rocm-for-ai/inference-optimization/model-quantization.rst
# - file: how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries.rst
# - file: how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel.md
# title: Optimize with Composable Kernel
# - file: how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel.rst
# title: Optimize Triton kernels
# - file: how-to/rocm-for-ai/inference-optimization/profiling-and-debugging.rst
# title: Profile and debug
# - file: how-to/rocm-for-ai/inference-optimization/workload.rst
# title: Workload optimization
#
# - url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
# title: AI tutorials
#
# - file: how-to/rocm-for-hpc/index.rst
# title: Use ROCm for HPC
# - file: how-to/system-optimization/index.rst
# title: System optimization
# - file: how-to/gpu-performance/mi300x.rst
# title: AMD Instinct MI300X performance guides
# - file: how-to/system-debugging.md
# - file: conceptual/compiler-topics.md
# title: Use advanced compiler features
# subtrees:
# - entries:
# - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html
# title: ROCm compiler infrastructure
# - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html
# title: Use AddressSanitizer
# - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html
# title: OpenMP support
# - file: how-to/setting-cus
# title: Set the number of CUs
# - file: how-to/Bar-Memory.rst
# title: Troubleshoot BAR access limitation
# - url: https://github.com/amd/rocm-examples
# title: ROCm examples
#
#
# - caption: Conceptual
# entries:
# - file: conceptual/gpu-arch.md
# title: GPU architecture overview
# subtrees:
# - entries:
# - file: conceptual/gpu-arch/mi300.md
# title: MI300 microarchitecture
# subtrees:
# - entries:
# - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
# title: AMD Instinct MI300/CDNA3 ISA
# - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
# title: White paper
# - file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
# title: MI300 and MI200 Performance counter
# - file: conceptual/gpu-arch/mi250.md
# title: MI250 microarchitecture
# subtrees:
# - entries:
# - url: https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf
# title: AMD Instinct MI200/CDNA2 ISA
# - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
# title: White paper
# - file: conceptual/gpu-arch/mi100.md
# title: MI100 microarchitecture
# subtrees:
# - entries:
# - url: https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf
# title: AMD Instinct MI100/CDNA1 ISA
# - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
# title: White paper
# - file: conceptual/file-reorg.md
# title: File structure (Linux FHS)
# - file: conceptual/gpu-isolation.md
# title: GPU isolation techniques
# - file: conceptual/cmake-packages.rst
# title: Using CMake
# - file: conceptual/ai-pytorch-inception.md
# title: Inception v3 with PyTorch
#
# - caption: Reference
# entries:
# - file: reference/api-libraries.md
# title: ROCm libraries
# - file: reference/rocm-tools.md
# title: ROCm tools, compilers, and runtimes
# - file: reference/gpu-arch-specs.rst
# - file: reference/gpu-atomics-operation.rst
# - file: reference/precision-support.rst
# title: Precision support
# - file: reference/graph-safe-support.rst
# title: Graph safe support
#
# - caption: Contribute
# entries:
# - file: contribute/contributing.md
# title: Contributing to the ROCm documentation
# subtrees:
# - entries:
# - file: contribute/toolchain.md
# title: ROCm documentation toolchain
# - file: contribute/building.md
# - file: contribute/feedback.md
# title: Providing feedback about the ROCm documentation
# - file: about/license.md
# title: ROCm licenses

View File

@@ -1,4 +1,4 @@
rocm-docs-core==1.20.1
sphinx-reredirects
sphinx-sitemap
sphinxcontrib.datatemplates==0.11.0
git+https://github.com/ROCm/rocm-docs-core.git@alexxu12/header-cap-space#egg=rocm-docs-core

View File

@@ -21,9 +21,11 @@ babel==2.17.0
# sphinx
beautifulsoup4==4.13.4
# via pydata-sphinx-theme
blinker==1.9.0
# via flask
breathe==4.36.0
# via rocm-docs-core
certifi==2025.4.26
certifi==2025.6.15
# via requests
cffi==1.17.1
# via
@@ -33,11 +35,12 @@ charset-normalizer==3.4.2
# via requests
click==8.2.1
# via
# flask
# jupyter-cache
# sphinx-external-toc
comm==0.2.2
# via ipykernel
cryptography==45.0.3
cryptography==45.0.4
# via pyjwt
debugpy==1.8.14
# via ipykernel
@@ -60,6 +63,8 @@ fastjsonschema==2.21.1
# via
# nbformat
# rocm-docs-core
flask==3.1.1
# via sphinx-sitemap
gitdb==4.0.12
# via gitpython
gitpython==3.1.44
@@ -80,10 +85,13 @@ ipython==8.37.0
# via
# ipykernel
# myst-nb
itsdangerous==2.2.0
# via flask
jedi==0.19.2
# via ipython
jinja2==3.1.6
# via
# flask
# myst-parser
# sphinx
jsonschema==4.24.0
@@ -107,7 +115,10 @@ markdown-it-py==3.0.0
# mdit-py-plugins
# myst-parser
markupsafe==3.0.2
# via jinja2
# via
# flask
# jinja2
# werkzeug
matplotlib-inline==0.1.7
# via
# ipykernel
@@ -134,7 +145,6 @@ nest-asyncio==1.6.0
packaging==25.0
# via
# ipykernel
# pydata-sphinx-theme
# sphinx
parso==0.8.4
# via jedi
@@ -152,13 +162,13 @@ pure-eval==0.2.3
# via stack-data
pycparser==2.22
# via cffi
pydata-sphinx-theme==0.15.4
pydata-sphinx-theme==0.16.1
# via
# rocm-docs-core
# sphinx-book-theme
pygithub==2.6.1
# via rocm-docs-core
pygments==2.19.1
pygments==2.19.2
# via
# accessible-pygments
# ipython
@@ -178,7 +188,7 @@ pyyaml==6.0.2
# rocm-docs-core
# sphinx-external-toc
# sphinxcontrib-datatemplates
pyzmq==26.4.0
pyzmq==27.0.0
# via
# ipykernel
# jupyter-client
@@ -190,7 +200,8 @@ requests==2.32.4
# via
# pygithub
# sphinx
rocm-docs-core==1.20.1
# sphinx-sitemap
rocm-docs-core @ git+https://github.com/ROCm/rocm-docs-core.git@alexxu12/header-cap-space
# via -r requirements.in
rpds-py==0.25.1
# via
@@ -215,12 +226,12 @@ sphinx==8.1.3
# sphinx-copybutton
# sphinx-design
# sphinx-external-toc
# sphinx-last-updated-by-git
# sphinx-notfound-page
# sphinx-reredirects
# sphinx-sitemap
# sphinxcontrib-datatemplates
# sphinxcontrib-runcmd
sphinx-book-theme==1.1.4
sphinx-book-theme==1.1.3
# via rocm-docs-core
sphinx-copybutton==0.5.2
# via rocm-docs-core
@@ -228,11 +239,13 @@ sphinx-design==0.6.1
# via rocm-docs-core
sphinx-external-toc==1.0.1
# via rocm-docs-core
sphinx-last-updated-by-git==0.3.8
# via sphinx-sitemap
sphinx-notfound-page==1.1.0
# via rocm-docs-core
sphinx-reredirects==0.1.6
# via -r requirements.in
sphinx-sitemap==2.6.0
sphinx-sitemap==2.7.1
# via -r requirements.in
sphinxcontrib-applehelp==2.0.0
# via sphinx
@@ -288,6 +301,8 @@ urllib3==2.5.0
# requests
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wrapt==1.17.2
# via deprecated
zipp==3.23.0