Compare commits

...

6 Commits

Author SHA1 Message Date
Istvan Kiss
d946fbeaee Precision support page update 2025-02-04 15:03:31 +01:00
Daniel Su
7179f2a72f Ex CI: add REPO_RADEON_VERSION as a global variable, clean up other variables (#4334) 2025-02-03 16:04:07 -05:00
Daniel Su
0df0f74312 Ex CI: rocprof-sdk & rocprof-systems VCN tracing dependencies (#4332) 2025-02-03 11:00:52 -05:00
Pratik Basyal
f885b5df6e Updated ROCm install on Linux installation method link (#4313) 2025-01-31 16:48:33 -05:00
dependabot[bot]
ee70cb0bb5 Build(deps): Bump rocm-docs-core from 1.13.0 to 1.15.0 in /docs/sphinx (#4315)
Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.13.0 to 1.15.0.
- [Release notes](https://github.com/ROCm/rocm-docs-core/releases)
- [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md)
- [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.13.0...v1.15.0)

---
updated-dependencies:
- dependency-name: rocm-docs-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-29 17:14:55 -07:00
Jeffrey Novotny
d401b5f152 Add ToC and index links to the AI Developer Tutorials (#4312)
* Add ToC and index links to the AI Developer Tutorials

* Change link positioning

* Change wording
2025-01-29 14:43:32 -05:00
23 changed files with 267 additions and 124 deletions

View File

@@ -49,21 +49,10 @@ jobs:
workspace:
clean: all
steps:
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
- task: Bash@3
displayName: 'Register ROCm packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -104,21 +93,10 @@ jobs:
JOB_GPU_TARGET: gfx942
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
steps:
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
- task: Bash@3
displayName: 'Register ROCm packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml

View File

@@ -48,21 +48,10 @@ jobs:
gfx942:
JOB_GPU_TARGET: gfx942
steps:
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
- task: Bash@3
displayName: 'Register ROCm packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -106,21 +95,10 @@ jobs:
JOB_GPU_TARGET: gfx942
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
steps:
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
- task: Bash@3
displayName: 'Register ROCm packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml

View File

@@ -36,7 +36,7 @@ jobs:
-DCPACK_GENERATOR=DEB
-DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
-DCPACK_RPM_PACKAGE_RELEASE="local.9999"
-DROCM_VERSION="$(next-release)"
-DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml

View File

@@ -35,7 +35,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
registerRadeon: true
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:

View File

@@ -37,6 +37,8 @@ parameters:
- clr
- llvm-project
- rccl
- rocDecode
- rocJPEG
- rocm-cmake
- rocm-core
- rocminfo
@@ -60,7 +62,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerRadeon: true
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -99,6 +101,7 @@ jobs:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
gpuTarget: $(JOB_GPU_TARGET)
registerROCmPackages: true
- job: rocprofiler_sdk_testing
dependsOn: rocprofiler_sdk
@@ -119,7 +122,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerRadeon: true
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -155,3 +158,4 @@ jobs:
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: $(JOB_GPU_TARGET)
registerROCmPackages: true

View File

@@ -46,13 +46,14 @@ parameters:
- name: rocmDependencies
type: object
default:
- amdsmi
- aomp
- clr
- llvm-project
- rccl
- rocDecode
- rocJPEG
- rocm-core
- rocm_smi_lib
- rocminfo
- ROCR-Runtime
- rocprofiler-register
@@ -75,7 +76,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerRadeon: true
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -159,7 +160,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
registerRadeon: true
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:

View File

@@ -19,7 +19,7 @@ jobs:
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
container:
image: ${{ variables.DOCKER_IMAGE_NAME }}:${{ variables.LATEST_DOCKER_VERSION }}
image: rocm/dev-ubuntu-22.04:${{ variables.LATEST_RELEASE_VERSION }}
workspace:
clean: all
steps:

View File

@@ -4,13 +4,13 @@ steps:
inputs:
targetType: inline
script: |
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
- task: Bash@3
displayName: 'Download aqlprofile'
inputs:
targetType: inline
script: wget -nv https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/$(packageName)
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
workingDirectory: '$(Pipeline.Workspace)'
- task: Bash@3
displayName: 'Extract aqlprofile'

View File

@@ -6,21 +6,21 @@ parameters:
- name: pipModules
type: object
default: []
- name: registerRadeon
- name: registerROCmPackages
type: boolean
default: false
steps:
- ${{ if eq(parameters.registerRadeon, true) }}:
- ${{ if eq(parameters.registerROCmPackages, true) }}:
- task: Bash@3
displayName: 'Register repo.radeon packages'
displayName: 'Register AMDGPU & ROCm repos'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/latest/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/latest jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
# firefox takes time to upgrade and is not needed for CI workloads, hold version

View File

@@ -154,8 +154,8 @@ steps:
script: |
echo "RUN mkdir --parents --mode=0755 /etc/apt/keyrings" >> Dockerfile
echo "RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null" >> Dockerfile
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/latest/ubuntu jammy main\" | tee /etc/apt/sources.list.d/amdgpu.list" >> Dockerfile
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/latest jammy main\" | tee --append /etc/apt/sources.list.d/rocm.list" >> Dockerfile
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main\" | tee /etc/apt/sources.list.d/amdgpu.list" >> Dockerfile
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main\" | tee --append /etc/apt/sources.list.d/rocm.list" >> Dockerfile
echo "RUN printf 'Package: *\\nPin: release o=repo.radeon.com\\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600" >> Dockerfile
echo "RUN DEBIAN_FRONTEND=noninteractive apt-get --yes update" >> Dockerfile
- ${{ if eq(parameters.registerCUDAPackages, true) }}:

View File

@@ -27,14 +27,14 @@ variables:
value: rocm-ci_larger_base_disk_pool
- name: GFX942_TEST_POOL
value: gfx942_test_pool
- name: LATEST_RELEASE_VERSION
value: 6.3.2
- name: REPO_RADEON_VERSION
value: 6.3.2
- name: NEXT_RELEASE_VERSION
value: 6.4.0
- name: LATEST_RELEASE_TAG
value: rocm-6.1.0
- name: DOCKER_IMAGE_NAME
value: rocm/dev-ubuntu-22.04
- name: LATEST_DOCKER_VERSION
value: 6.1
- name: KEYRING_VERSION
value: 6.3
value: rocm-6.3.2
- name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
value: 197
- name: AMDMIGRAPHX_PIPELINE_ID
@@ -151,8 +151,6 @@ variables:
value: 105
- name: HIPTENSOR_TAGGED_PIPELINE_ID
value: 56
- name: LAST_RELEASE
value: 6.1.0
- name: LLVM_PROJECT_PIPELINE_ID
value: 2
- name: LLVM_PROJECT_TAGGED_PIPELINE_ID
@@ -183,10 +181,6 @@ variables:
value: 100
- name: RDC_TAGGED_PIPELINE_ID
value: 59
- name: REIMAGE_ORG
value: AGS-ROCm-CI
- name: REIMAGE_REPO
value: cirrascale-reimage-automation
- name: ROCAL_PIPELINE_ID
value: 151
- name: ROCALUTION_GFX942_TEST_PIPELINE_ID

View File

@@ -16,6 +16,9 @@ Throughout the following topics, this guide discusses the goals and :ref:`challe
model <fine-tuning-llms-concept-challenge>` like Llama 2. In the
sections that follow, you'll find practical guides on libraries and tools to accelerate your fine-tuning.
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
- :doc:`Conceptual overview of fine-tuning LLMs <overview>`
- :doc:`Fine-tuning and inference <fine-tuning-and-inference>` using a

View File

@@ -12,6 +12,9 @@ You can use ROCm to perform distributed training, which enables you to train mod
Overall, ROCm can be used to improve the performance and efficiency of your AI applications. With its training, fine-tuning, and inference support, ROCm provides a complete solution for optimizing AI workflows and achieving the optimum results possible on AMD GPUs.
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
In this guide, you'll learn how to use ROCm for AI:
- :doc:`Training <training/index>`

View File

@@ -277,7 +277,7 @@ Installing FBGEMM_GPU
Installing FBGEMM_GPU consists of the following steps:
* Set up an isolated Miniconda environment
* Install ROCm using Docker or the :doc:`package manager <rocm-install-on-linux:install/native-install/index>`
* Install ROCm using Docker or the :doc:`package manager <rocm-install-on-linux:install/install-methods/package-manager-index>`
* Install the nightly `PyTorch <https://pytorch.org/>`_ build
* Complete the pre-build and build tasks

View File

@@ -11,6 +11,9 @@ Understanding the ROCm™ software platforms architecture and capabilities is
Throughout the following topics, this section provides a comprehensive guide to setting up and deploying AI inference on AMD GPUs. This includes instructions on how to install ROCm, how to use Hugging Face Transformers to manage pre-trained models for natural language processing (NLP) tasks, how to validate vLLM on AMD Instinct™ MI300X accelerators and illustrate how to deploy trained models in production environments.
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
- :doc:`Installing ROCm and machine learning frameworks <install>`
- :doc:`Running models from Hugging Face <hugging-face-models>`

View File

@@ -26,7 +26,7 @@ If youre using a Radeon GPU for graphics-accelerated applications, refer to t
ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install/install-overview>`:
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/native-install/index>`
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/install-methods/package-manager-index>`
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`

View File

@@ -14,6 +14,9 @@ Training models on AMD GPUs with the ROCm™ software platform allows you to use
The ROCm software platform makes it easier to train models on AMD GPUs while maintaining compatibility with existing code and tools. The platform also provides features like multi-GPU support, allowing for scaling and parallelization of model training across multiple GPUs to enhance performance.
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
In this guide, you'll learn about:
- :doc:`Training a model <train-a-model>`

View File

@@ -12,7 +12,7 @@ myst:
This chapter reviews system settings that are required to configure the system
for ROCm virtualization on RDNA2-based AMD Radeon™ PRO GPUs. Installing ROCm on
Bare Metal follows the routine ROCm
{doc}`installation procedure<rocm-install-on-linux:install/native-install/index>`.
{doc}`installation procedure<rocm-install-on-linux:install/install-methods/package-manager-index>`.
To enable ROCm virtualization on V620, one has to setup Single Root I/O
Virtualization (SR-IOV) in the BIOS via setting found in the following
@@ -166,4 +166,4 @@ First, assign GPU virtual function (VF) to VM using the following steps.
Then start the VM.
Finally install ROCm on the virtual machine (VM). For detailed instructions,
refer to the {doc}`Linux install guide<rocm-install-on-linux:install/native-install/index>`.
refer to the {doc}`Linux install guide<rocm-install-on-linux:install/install-methods/package-manager-index>`.

View File

@@ -38,6 +38,7 @@ ROCm documentation is organized into the following categories:
:class-body: rocm-card-banner rocm-hue-12
* [Use ROCm for AI](./how-to/rocm-for-ai/index.rst)
* [AI tutorials](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/)
* [Use ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
* [System optimization](./how-to/system-optimization/index.rst)
* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst)

View File

@@ -1,19 +1,23 @@
.. meta::
:description: Supported data types in ROCm
:keywords: int8, float8, float8 (E4M3), float8 (E5M2), bfloat8, float16, half, bfloat16, tensorfloat32, float,
float32, float64, double, AMD, ROCm, AMDGPU
:description: Supported data types of AMD GPUs and libraries in ROCm.
:keywords: precision, data types, HIP types, int8, float8, float8 (E4M3),
float8 (E5M2), bfloat8, float16, half, bfloat16, tensorfloat32,
float, float32, float64, double, AMD data types, HIP data types,
ROCm precision, ROCm data types
*************************************************************
Precision support
Data types and precision support
*************************************************************
Use the following sections to identify data types and HIP types ROCm™ supports.
This topic lists the supported data types of AMD GPUs and ROCm libraries.
Corresponding :doc:`HIP <hip:index>` data types are also noted.
Integral types
==========================================
The signed and unsigned integral types that are supported by ROCm are listed in the following table,
together with their corresponding HIP type and a short description.
The signed and unsigned integral types supported by ROCm are listed in
the following table, along with their corresponding HIP type and a short
description.
.. list-table::
@@ -46,8 +50,8 @@ together with their corresponding HIP type and a short description.
Floating-point types
==========================================
The floating-point types that are supported by ROCm are listed in the following table, together with
their corresponding HIP type and a short description.
The floating-point types supported by ROCm are listed in the following
table, along with their corresponding HIP type and a short description.
.. image:: ../data/about/compatibility/floating-point-data-types.png
:alt: Supported floating-point types
@@ -63,43 +67,62 @@ their corresponding HIP type and a short description.
*
- float8 (E4M3)
- ``-``
- An 8-bit floating-point number that mostly follows IEEE-754 conventions and **S1E4M3** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ , with expanded range and with no infinity or signed zero. NaN is represented as negative zero.
- An 8-bit floating-point number that mostly follows IEEE-754 conventions
and **S1E4M3** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ ,
with expanded range and no infinity or signed zero. NaN is
represented as negative zero.
*
- float8 (E5M2)
- ``-``
- An 8-bit floating-point number mostly following IEEE-754 conventions and **S1E5M2** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ , with expanded range and with no infinity or signed zero. NaN is represented as negative zero.
- An 8-bit floating-point number mostly following IEEE-754 conventions and
**S1E5M2** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ ,
with expanded range and no infinity or signed zero. NaN is
represented as negative zero.
*
- float16
- ``half``
- A 16-bit floating-point number that conforms to the IEEE 754-2008 half-precision storage format.
- A 16-bit floating-point number that conforms to the IEEE 754-2008
half-precision storage format.
*
- bfloat16
- ``bfloat16``
- A shortened 16-bit version of the IEEE 754 single-precision storage format.
- A shortened 16-bit version of the IEEE 754 single-precision storage
format.
*
- tensorfloat32
- ``-``
- A floating-point number that occupies 32 bits or less of storage, providing improved range compared to half (16-bit) format, at (potentially) greater throughput than single-precision (32-bit) formats.
- A floating-point number that occupies 32 bits or less of storage,
providing improved range compared to half (16-bit) format, at
(potentially) greater throughput than single-precision (32-bit) formats.
*
- float32
- ``float``
- A 32-bit floating-point number that conforms to the IEEE 754 single-precision storage format.
- A 32-bit floating-point number that conforms to the IEEE 754
single-precision storage format.
*
- float64
- ``double``
- A 64-bit floating-point number that conforms to the IEEE 754 double-precision storage format.
- A 64-bit floating-point number that conforms to the IEEE 754
double-precision storage format.
.. note::
* The float8 and tensorfloat32 types are internal types used in calculations in Matrix Cores and can be stored in any type of the same size.
* The encodings for FP8 (E5M2) and FP8 (E4M3) that are natively supported by MI300 differ from the FP8 (E5M2) and FP8 (E4M3) encodings used in H100 (`FP8 Formats for Deep Learning <https://arxiv.org/abs/2209.05433>`_).
* The float8 and tensorfloat32 types are internal types used in calculations
in Matrix Cores and can be stored in any type of the same size.
* The encodings for FP8 (E5M2) and FP8 (E4M3) that the
MI300 series natively supports differ from the FP8 (E5M2) and FP8 (E4M3)
encodings used in NVIDIA H100
(`FP8 Formats for Deep Learning <https://arxiv.org/abs/2209.05433>`_).
* In some AMD documents and articles, float8 (E5M2) is referred to as bfloat8.
ROCm support icons
==========================================
In the following sections, we use icons to represent the level of support. These icons, described in the
following table, are also used on the library data type support pages.
In the following sections, icons represent the level of support. These
icons, described in the following table, are also used in the library data type
support pages.
.. list-table::
:header-rows: 1
@@ -121,14 +144,27 @@ following table, are also used on the library data type support pages.
.. note::
* Full support means that the type is supported natively or with hardware emulation.
* Native support means that the operations for that type are implemented in hardware. Types that are not natively supported are emulated with the available hardware. The performance of non-natively supported types can differ from the full instruction throughput rate. For example, 16-bit integer operations can be performed on the 32-bit integer ALUs at full rate; however, 64-bit integer operations might need several instructions on the 32-bit integer ALUs.
* Any type can be emulated by software, but this page does not cover such cases.
* Full support means that the type is supported natively or with hardware
emulation.
Hardware type support
* Native support means that the operations for that type are implemented in
hardware. Types that are not natively supported are emulated with the
available hardware. The performance of non-natively supported types can
differ from the full instruction throughput rate. For example, 16-bit
integer operations can be performed on the 32-bit integer ALUs at full rate;
however, 64-bit integer operations might need several instructions on the
32-bit integer ALUs.
* Any type can be emulated by software, but this page does not cover such
cases.
Hardware data type support
==========================================
AMD GPU hardware support for data types is listed in the following tables.
The following tables provide information about AMD Instinct accelerators support
for various data types. The MI200 series GPUs, which include MI210, MI250, and
MI250X, are based on the CDNA2 architecture. The MI300 series GPUs, consisting
of MI300A, MI300X, and MI325X, are built on the CDNA3 architecture.
Compute units support
-------------------------------------------------------------------------------
@@ -375,21 +411,23 @@ The following table lists data type support for atomic operations.
.. note::
For cases that are not natively supported, you can emulate atomic operations using software.
Software-emulated atomic operations have high negative performance impact when they frequently
access the same memory address.
You can emulate atomic operations using software for cases that are not
natively supported. Software-emulated atomic operations have a high negative
performance impact when they frequently access the same memory address.
Data Type support in ROCm Libraries
Data type support in ROCm libraries
==========================================
ROCm library support for int8, float8 (E4M3), float8 (E5M2), int16, float16, bfloat16, int32,
tensorfloat32, float32, int64, and float64 is listed in the following tables.
ROCm library support for int8, float8 (E4M3), float8 (E5M2), int16, float16,
bfloat16, int32, tensorfloat32, float32, int64, and float64 is listed in the
following tables.
Libraries input/output type support
-------------------------------------------------------------------------------
The following tables list ROCm library support for specific input and output data types. For a detailed
description, refer to the corresponding library data type support page.
The following tables list ROCm library support for specific input and output
data types. Refer to the corresponding library data type support page for a
detailed description.
.. tab-set::
@@ -516,8 +554,9 @@ description, refer to the corresponding library data type support page.
Libraries internal calculations type support
-------------------------------------------------------------------------------
The following tables list ROCm library support for specific internal data types. For a detailed
description, refer to the corresponding library data type support page.
The following tables list ROCm library support for specific internal data types.
Refer to the corresponding library data type support page for a detailed
description.
.. tab-set::

View File

@@ -89,7 +89,10 @@ subtrees:
title: Profile and debug
- file: how-to/rocm-for-ai/inference-optimization/workload.rst
title: Workload tuning
- url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
title: AI tutorials
- file: how-to/rocm-for-hpc/index.rst
title: Use ROCm for HPC
- file: how-to/system-optimization/index.rst
@@ -126,6 +129,7 @@ subtrees:
- url: https://github.com/amd/rocm-examples
title: ROCm examples
- caption: Conceptual
entries:
- file: conceptual/gpu-arch.md

View File

@@ -1,3 +1,3 @@
rocm-docs-core==1.13.0
rocm-docs-core==1.15.0
sphinx-reredirects
sphinx-sitemap

View File

@@ -8,6 +8,13 @@ accessible-pygments==0.0.5
# via pydata-sphinx-theme
alabaster==1.0.0
# via sphinx
asttokens==3.0.0
# via stack-data
attrs==25.1.0
# via
# jsonschema
# jupyter-cache
# referencing
babel==2.16.0
# via
# pydata-sphinx-theme
@@ -25,9 +32,17 @@ cffi==1.17.1
charset-normalizer==3.4.0
# via requests
click==8.1.7
# via sphinx-external-toc
# via
# jupyter-cache
# sphinx-external-toc
comm==0.2.2
# via ipykernel
cryptography==43.0.3
# via pyjwt
debugpy==1.8.12
# via ipykernel
decorator==5.1.1
# via ipython
deprecated==1.2.15
# via pygithub
docutils==0.21.2
@@ -36,34 +51,103 @@ docutils==0.21.2
# myst-parser
# pydata-sphinx-theme
# sphinx
exceptiongroup==1.2.2
# via ipython
executing==2.2.0
# via stack-data
fastjsonschema==2.20.0
# via rocm-docs-core
# via
# nbformat
# rocm-docs-core
gitdb==4.0.11
# via gitpython
gitpython==3.1.43
# via rocm-docs-core
greenlet==3.1.1
# via sqlalchemy
idna==3.10
# via requests
imagesize==1.4.1
# via sphinx
importlib-metadata==8.6.1
# via
# jupyter-cache
# myst-nb
ipykernel==6.29.5
# via myst-nb
ipython==8.31.0
# via
# ipykernel
# myst-nb
jedi==0.19.2
# via ipython
jinja2==3.1.5
# via
# myst-parser
# sphinx
jsonschema==4.23.0
# via nbformat
jsonschema-specifications==2024.10.1
# via jsonschema
jupyter-cache==1.0.1
# via myst-nb
jupyter-client==8.6.3
# via
# ipykernel
# nbclient
jupyter-core==5.7.2
# via
# ipykernel
# jupyter-client
# nbclient
# nbformat
markdown-it-py==3.0.0
# via
# mdit-py-plugins
# myst-parser
markupsafe==3.0.2
# via jinja2
matplotlib-inline==0.1.7
# via
# ipykernel
# ipython
mdit-py-plugins==0.4.2
# via myst-parser
mdurl==0.1.2
# via markdown-it-py
myst-parser==4.0.0
myst-nb==1.1.2
# via rocm-docs-core
myst-parser==4.0.0
# via myst-nb
nbclient==0.10.2
# via
# jupyter-cache
# myst-nb
nbformat==5.10.4
# via
# jupyter-cache
# myst-nb
# nbclient
nest-asyncio==1.6.0
# via ipykernel
packaging==24.2
# via sphinx
# via
# ipykernel
# sphinx
parso==0.8.4
# via jedi
pexpect==4.9.0
# via ipython
platformdirs==4.3.6
# via jupyter-core
prompt-toolkit==3.0.50
# via ipython
psutil==6.1.1
# via ipykernel
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
pycparser==2.22
# via cffi
pydata-sphinx-theme==0.16.0
@@ -75,23 +159,42 @@ pygithub==2.5.0
pygments==2.18.0
# via
# accessible-pygments
# ipython
# pydata-sphinx-theme
# sphinx
pyjwt[crypto]==2.10.0
# via pygithub
pynacl==1.5.0
# via pygithub
python-dateutil==2.9.0.post0
# via jupyter-client
pyyaml==6.0.2
# via
# jupyter-cache
# myst-nb
# myst-parser
# rocm-docs-core
# sphinx-external-toc
pyzmq==26.2.0
# via
# ipykernel
# jupyter-client
referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.13.0
rocm-docs-core==1.15.0
# via -r requirements.in
rpds-py==0.22.3
# via
# jsonschema
# referencing
six==1.17.0
# via python-dateutil
smmap==5.0.1
# via gitdb
snowballstemmer==2.2.0
@@ -101,6 +204,7 @@ soupsieve==2.6
sphinx==8.1.3
# via
# breathe
# myst-nb
# myst-parser
# pydata-sphinx-theme
# rocm-docs-core
@@ -137,15 +241,43 @@ sphinxcontrib-qthelp==2.0.0
# via sphinx
sphinxcontrib-serializinghtml==2.0.0
# via sphinx
sqlalchemy==2.0.37
# via jupyter-cache
stack-data==0.6.3
# via ipython
tabulate==0.9.0
# via jupyter-cache
tomli==2.1.0
# via sphinx
tornado==6.4.2
# via
# ipykernel
# jupyter-client
traitlets==5.14.3
# via
# comm
# ipykernel
# ipython
# jupyter-client
# jupyter-core
# matplotlib-inline
# nbclient
# nbformat
typing-extensions==4.12.2
# via
# ipython
# myst-nb
# pydata-sphinx-theme
# pygithub
# referencing
# sqlalchemy
urllib3==2.2.3
# via
# pygithub
# requests
wcwidth==0.2.13
# via prompt-toolkit
wrapt==1.17.0
# via deprecated
zipp==3.21.0
# via importlib-metadata