mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
Compare commits
6 Commits
docs/6.3.2
...
precision_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d946fbeaee | ||
|
|
7179f2a72f | ||
|
|
0df0f74312 | ||
|
|
f885b5df6e | ||
|
|
ee70cb0bb5 | ||
|
|
d401b5f152 |
@@ -49,21 +49,10 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
|
||||
- task: Bash@3
|
||||
displayName: 'Register ROCm packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -104,21 +93,10 @@ jobs:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
|
||||
steps:
|
||||
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
|
||||
- task: Bash@3
|
||||
displayName: 'Register ROCm packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
|
||||
@@ -48,21 +48,10 @@ jobs:
|
||||
gfx942:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
steps:
|
||||
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
|
||||
- task: Bash@3
|
||||
displayName: 'Register ROCm packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -106,21 +95,10 @@ jobs:
|
||||
JOB_GPU_TARGET: gfx942
|
||||
JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }}
|
||||
steps:
|
||||
# Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it
|
||||
- task: Bash@3
|
||||
displayName: 'Register ROCm packages'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
-DCPACK_GENERATOR=DEB
|
||||
-DCPACK_DEBIAN_PACKAGE_RELEASE="local.9999~99.99"
|
||||
-DCPACK_RPM_PACKAGE_RELEASE="local.9999"
|
||||
-DROCM_VERSION="$(next-release)"
|
||||
-DROCM_VERSION="$(NEXT_RELEASE_VERSION)"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerRadeon: true
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
|
||||
@@ -37,6 +37,8 @@ parameters:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocDecode
|
||||
- rocJPEG
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
@@ -60,7 +62,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerRadeon: true
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -99,6 +101,7 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
registerROCmPackages: true
|
||||
|
||||
- job: rocprofiler_sdk_testing
|
||||
dependsOn: rocprofiler_sdk
|
||||
@@ -119,7 +122,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerRadeon: true
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -155,3 +158,4 @@ jobs:
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
registerROCmPackages: true
|
||||
|
||||
@@ -46,13 +46,14 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- amdsmi
|
||||
- aomp
|
||||
- clr
|
||||
- llvm-project
|
||||
- rccl
|
||||
- rocDecode
|
||||
- rocJPEG
|
||||
- rocm-core
|
||||
- rocm_smi_lib
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
@@ -75,7 +76,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerRadeon: true
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -159,7 +160,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerRadeon: true
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
|
||||
@@ -19,7 +19,7 @@ jobs:
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
container:
|
||||
image: ${{ variables.DOCKER_IMAGE_NAME }}:${{ variables.LATEST_DOCKER_VERSION }}
|
||||
image: rocm/dev-ubuntu-22.04:${{ variables.LATEST_RELEASE_VERSION }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -4,13 +4,13 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
- task: Bash@3
|
||||
displayName: 'Download aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
displayName: 'Extract aqlprofile'
|
||||
|
||||
@@ -6,21 +6,21 @@ parameters:
|
||||
- name: pipModules
|
||||
type: object
|
||||
default: []
|
||||
- name: registerRadeon
|
||||
- name: registerROCmPackages
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
steps:
|
||||
- ${{ if eq(parameters.registerRadeon, true) }}:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register repo.radeon packages'
|
||||
displayName: 'Register AMDGPU & ROCm repos'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/latest/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/latest jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
# firefox takes time to upgrade and is not needed for CI workloads, hold version
|
||||
|
||||
@@ -154,8 +154,8 @@ steps:
|
||||
script: |
|
||||
echo "RUN mkdir --parents --mode=0755 /etc/apt/keyrings" >> Dockerfile
|
||||
echo "RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null" >> Dockerfile
|
||||
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/latest/ubuntu jammy main\" | tee /etc/apt/sources.list.d/amdgpu.list" >> Dockerfile
|
||||
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/latest jammy main\" | tee --append /etc/apt/sources.list.d/rocm.list" >> Dockerfile
|
||||
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$(REPO_RADEON_VERSION)/ubuntu jammy main\" | tee /etc/apt/sources.list.d/amdgpu.list" >> Dockerfile
|
||||
echo "RUN echo \"deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION) jammy main\" | tee --append /etc/apt/sources.list.d/rocm.list" >> Dockerfile
|
||||
echo "RUN printf 'Package: *\\nPin: release o=repo.radeon.com\\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600" >> Dockerfile
|
||||
echo "RUN DEBIAN_FRONTEND=noninteractive apt-get --yes update" >> Dockerfile
|
||||
- ${{ if eq(parameters.registerCUDAPackages, true) }}:
|
||||
|
||||
@@ -27,14 +27,14 @@ variables:
|
||||
value: rocm-ci_larger_base_disk_pool
|
||||
- name: GFX942_TEST_POOL
|
||||
value: gfx942_test_pool
|
||||
- name: LATEST_RELEASE_VERSION
|
||||
value: 6.3.2
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.3.2
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 6.4.0
|
||||
- name: LATEST_RELEASE_TAG
|
||||
value: rocm-6.1.0
|
||||
- name: DOCKER_IMAGE_NAME
|
||||
value: rocm/dev-ubuntu-22.04
|
||||
- name: LATEST_DOCKER_VERSION
|
||||
value: 6.1
|
||||
- name: KEYRING_VERSION
|
||||
value: 6.3
|
||||
value: rocm-6.3.2
|
||||
- name: AMDMIGRAPHX_GFX942_TEST_PIPELINE_ID
|
||||
value: 197
|
||||
- name: AMDMIGRAPHX_PIPELINE_ID
|
||||
@@ -151,8 +151,6 @@ variables:
|
||||
value: 105
|
||||
- name: HIPTENSOR_TAGGED_PIPELINE_ID
|
||||
value: 56
|
||||
- name: LAST_RELEASE
|
||||
value: 6.1.0
|
||||
- name: LLVM_PROJECT_PIPELINE_ID
|
||||
value: 2
|
||||
- name: LLVM_PROJECT_TAGGED_PIPELINE_ID
|
||||
@@ -183,10 +181,6 @@ variables:
|
||||
value: 100
|
||||
- name: RDC_TAGGED_PIPELINE_ID
|
||||
value: 59
|
||||
- name: REIMAGE_ORG
|
||||
value: AGS-ROCm-CI
|
||||
- name: REIMAGE_REPO
|
||||
value: cirrascale-reimage-automation
|
||||
- name: ROCAL_PIPELINE_ID
|
||||
value: 151
|
||||
- name: ROCALUTION_GFX942_TEST_PIPELINE_ID
|
||||
|
||||
@@ -16,6 +16,9 @@ Throughout the following topics, this guide discusses the goals and :ref:`challe
|
||||
model <fine-tuning-llms-concept-challenge>` like Llama 2. In the
|
||||
sections that follow, you'll find practical guides on libraries and tools to accelerate your fine-tuning.
|
||||
|
||||
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
|
||||
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
|
||||
|
||||
- :doc:`Conceptual overview of fine-tuning LLMs <overview>`
|
||||
|
||||
- :doc:`Fine-tuning and inference <fine-tuning-and-inference>` using a
|
||||
|
||||
@@ -12,6 +12,9 @@ You can use ROCm to perform distributed training, which enables you to train mod
|
||||
|
||||
Overall, ROCm can be used to improve the performance and efficiency of your AI applications. With its training, fine-tuning, and inference support, ROCm provides a complete solution for optimizing AI workflows and achieving the optimum results possible on AMD GPUs.
|
||||
|
||||
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
|
||||
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
|
||||
|
||||
In this guide, you'll learn how to use ROCm for AI:
|
||||
|
||||
- :doc:`Training <training/index>`
|
||||
|
||||
@@ -277,7 +277,7 @@ Installing FBGEMM_GPU
|
||||
Installing FBGEMM_GPU consists of the following steps:
|
||||
|
||||
* Set up an isolated Miniconda environment
|
||||
* Install ROCm using Docker or the :doc:`package manager <rocm-install-on-linux:install/native-install/index>`
|
||||
* Install ROCm using Docker or the :doc:`package manager <rocm-install-on-linux:install/install-methods/package-manager-index>`
|
||||
* Install the nightly `PyTorch <https://pytorch.org/>`_ build
|
||||
* Complete the pre-build and build tasks
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ Understanding the ROCm™ software platform’s architecture and capabilities is
|
||||
|
||||
Throughout the following topics, this section provides a comprehensive guide to setting up and deploying AI inference on AMD GPUs. This includes instructions on how to install ROCm, how to use Hugging Face Transformers to manage pre-trained models for natural language processing (NLP) tasks, how to validate vLLM on AMD Instinct™ MI300X accelerators and illustrate how to deploy trained models in production environments.
|
||||
|
||||
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
|
||||
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
|
||||
|
||||
- :doc:`Installing ROCm and machine learning frameworks <install>`
|
||||
|
||||
- :doc:`Running models from Hugging Face <hugging-face-models>`
|
||||
|
||||
@@ -26,7 +26,7 @@ If you’re using a Radeon GPU for graphics-accelerated applications, refer to t
|
||||
|
||||
ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install/install-overview>`:
|
||||
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/native-install/index>`
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/install-methods/package-manager-index>`
|
||||
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`
|
||||
|
||||
|
||||
@@ -14,6 +14,9 @@ Training models on AMD GPUs with the ROCm™ software platform allows you to use
|
||||
|
||||
The ROCm software platform makes it easier to train models on AMD GPUs while maintaining compatibility with existing code and tools. The platform also provides features like multi-GPU support, allowing for scaling and parallelization of model training across multiple GPUs to enhance performance.
|
||||
|
||||
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
|
||||
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
|
||||
|
||||
In this guide, you'll learn about:
|
||||
|
||||
- :doc:`Training a model <train-a-model>`
|
||||
|
||||
@@ -12,7 +12,7 @@ myst:
|
||||
This chapter reviews system settings that are required to configure the system
|
||||
for ROCm virtualization on RDNA2-based AMD Radeon™ PRO GPUs. Installing ROCm on
|
||||
Bare Metal follows the routine ROCm
|
||||
{doc}`installation procedure<rocm-install-on-linux:install/native-install/index>`.
|
||||
{doc}`installation procedure<rocm-install-on-linux:install/install-methods/package-manager-index>`.
|
||||
|
||||
To enable ROCm virtualization on V620, one has to setup Single Root I/O
|
||||
Virtualization (SR-IOV) in the BIOS via setting found in the following
|
||||
@@ -166,4 +166,4 @@ First, assign GPU virtual function (VF) to VM using the following steps.
|
||||
Then start the VM.
|
||||
|
||||
Finally install ROCm on the virtual machine (VM). For detailed instructions,
|
||||
refer to the {doc}`Linux install guide<rocm-install-on-linux:install/native-install/index>`.
|
||||
refer to the {doc}`Linux install guide<rocm-install-on-linux:install/install-methods/package-manager-index>`.
|
||||
|
||||
@@ -38,6 +38,7 @@ ROCm documentation is organized into the following categories:
|
||||
:class-body: rocm-card-banner rocm-hue-12
|
||||
|
||||
* [Use ROCm for AI](./how-to/rocm-for-ai/index.rst)
|
||||
* [AI tutorials](https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/)
|
||||
* [Use ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
|
||||
* [System optimization](./how-to/system-optimization/index.rst)
|
||||
* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst)
|
||||
|
||||
@@ -1,19 +1,23 @@
|
||||
.. meta::
|
||||
:description: Supported data types in ROCm
|
||||
:keywords: int8, float8, float8 (E4M3), float8 (E5M2), bfloat8, float16, half, bfloat16, tensorfloat32, float,
|
||||
float32, float64, double, AMD, ROCm, AMDGPU
|
||||
:description: Supported data types of AMD GPUs and libraries in ROCm.
|
||||
:keywords: precision, data types, HIP types, int8, float8, float8 (E4M3),
|
||||
float8 (E5M2), bfloat8, float16, half, bfloat16, tensorfloat32,
|
||||
float, float32, float64, double, AMD data types, HIP data types,
|
||||
ROCm precision, ROCm data types
|
||||
|
||||
*************************************************************
|
||||
Precision support
|
||||
Data types and precision support
|
||||
*************************************************************
|
||||
|
||||
Use the following sections to identify data types and HIP types ROCm™ supports.
|
||||
This topic lists the supported data types of AMD GPUs and ROCm libraries.
|
||||
Corresponding :doc:`HIP <hip:index>` data types are also noted.
|
||||
|
||||
Integral types
|
||||
==========================================
|
||||
|
||||
The signed and unsigned integral types that are supported by ROCm are listed in the following table,
|
||||
together with their corresponding HIP type and a short description.
|
||||
The signed and unsigned integral types supported by ROCm are listed in
|
||||
the following table, along with their corresponding HIP type and a short
|
||||
description.
|
||||
|
||||
|
||||
.. list-table::
|
||||
@@ -46,8 +50,8 @@ together with their corresponding HIP type and a short description.
|
||||
Floating-point types
|
||||
==========================================
|
||||
|
||||
The floating-point types that are supported by ROCm are listed in the following table, together with
|
||||
their corresponding HIP type and a short description.
|
||||
The floating-point types supported by ROCm are listed in the following
|
||||
table, along with their corresponding HIP type and a short description.
|
||||
|
||||
.. image:: ../data/about/compatibility/floating-point-data-types.png
|
||||
:alt: Supported floating-point types
|
||||
@@ -63,43 +67,62 @@ their corresponding HIP type and a short description.
|
||||
*
|
||||
- float8 (E4M3)
|
||||
- ``-``
|
||||
- An 8-bit floating-point number that mostly follows IEEE-754 conventions and **S1E4M3** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ , with expanded range and with no infinity or signed zero. NaN is represented as negative zero.
|
||||
- An 8-bit floating-point number that mostly follows IEEE-754 conventions
|
||||
and **S1E4M3** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ ,
|
||||
with expanded range and no infinity or signed zero. NaN is
|
||||
represented as negative zero.
|
||||
*
|
||||
- float8 (E5M2)
|
||||
- ``-``
|
||||
- An 8-bit floating-point number mostly following IEEE-754 conventions and **S1E5M2** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ , with expanded range and with no infinity or signed zero. NaN is represented as negative zero.
|
||||
- An 8-bit floating-point number mostly following IEEE-754 conventions and
|
||||
**S1E5M2** bit layout, as described in `8-bit Numerical Formats for Deep Neural Networks <https://arxiv.org/abs/2206.02915>`_ ,
|
||||
with expanded range and no infinity or signed zero. NaN is
|
||||
represented as negative zero.
|
||||
*
|
||||
- float16
|
||||
- ``half``
|
||||
- A 16-bit floating-point number that conforms to the IEEE 754-2008 half-precision storage format.
|
||||
- A 16-bit floating-point number that conforms to the IEEE 754-2008
|
||||
half-precision storage format.
|
||||
*
|
||||
- bfloat16
|
||||
- ``bfloat16``
|
||||
- A shortened 16-bit version of the IEEE 754 single-precision storage format.
|
||||
- A shortened 16-bit version of the IEEE 754 single-precision storage
|
||||
format.
|
||||
*
|
||||
- tensorfloat32
|
||||
- ``-``
|
||||
- A floating-point number that occupies 32 bits or less of storage, providing improved range compared to half (16-bit) format, at (potentially) greater throughput than single-precision (32-bit) formats.
|
||||
- A floating-point number that occupies 32 bits or less of storage,
|
||||
providing improved range compared to half (16-bit) format, at
|
||||
(potentially) greater throughput than single-precision (32-bit) formats.
|
||||
*
|
||||
- float32
|
||||
- ``float``
|
||||
- A 32-bit floating-point number that conforms to the IEEE 754 single-precision storage format.
|
||||
- A 32-bit floating-point number that conforms to the IEEE 754
|
||||
single-precision storage format.
|
||||
*
|
||||
- float64
|
||||
- ``double``
|
||||
- A 64-bit floating-point number that conforms to the IEEE 754 double-precision storage format.
|
||||
- A 64-bit floating-point number that conforms to the IEEE 754
|
||||
double-precision storage format.
|
||||
|
||||
.. note::
|
||||
|
||||
* The float8 and tensorfloat32 types are internal types used in calculations in Matrix Cores and can be stored in any type of the same size.
|
||||
* The encodings for FP8 (E5M2) and FP8 (E4M3) that are natively supported by MI300 differ from the FP8 (E5M2) and FP8 (E4M3) encodings used in H100 (`FP8 Formats for Deep Learning <https://arxiv.org/abs/2209.05433>`_).
|
||||
* The float8 and tensorfloat32 types are internal types used in calculations
|
||||
in Matrix Cores and can be stored in any type of the same size.
|
||||
|
||||
* The encodings for FP8 (E5M2) and FP8 (E4M3) that the
|
||||
MI300 series natively supports differ from the FP8 (E5M2) and FP8 (E4M3)
|
||||
encodings used in NVIDIA H100
|
||||
(`FP8 Formats for Deep Learning <https://arxiv.org/abs/2209.05433>`_).
|
||||
|
||||
* In some AMD documents and articles, float8 (E5M2) is referred to as bfloat8.
|
||||
|
||||
ROCm support icons
|
||||
==========================================
|
||||
|
||||
In the following sections, we use icons to represent the level of support. These icons, described in the
|
||||
following table, are also used on the library data type support pages.
|
||||
In the following sections, icons represent the level of support. These
|
||||
icons, described in the following table, are also used in the library data type
|
||||
support pages.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
@@ -121,14 +144,27 @@ following table, are also used on the library data type support pages.
|
||||
|
||||
.. note::
|
||||
|
||||
* Full support means that the type is supported natively or with hardware emulation.
|
||||
* Native support means that the operations for that type are implemented in hardware. Types that are not natively supported are emulated with the available hardware. The performance of non-natively supported types can differ from the full instruction throughput rate. For example, 16-bit integer operations can be performed on the 32-bit integer ALUs at full rate; however, 64-bit integer operations might need several instructions on the 32-bit integer ALUs.
|
||||
* Any type can be emulated by software, but this page does not cover such cases.
|
||||
* Full support means that the type is supported natively or with hardware
|
||||
emulation.
|
||||
|
||||
Hardware type support
|
||||
* Native support means that the operations for that type are implemented in
|
||||
hardware. Types that are not natively supported are emulated with the
|
||||
available hardware. The performance of non-natively supported types can
|
||||
differ from the full instruction throughput rate. For example, 16-bit
|
||||
integer operations can be performed on the 32-bit integer ALUs at full rate;
|
||||
however, 64-bit integer operations might need several instructions on the
|
||||
32-bit integer ALUs.
|
||||
|
||||
* Any type can be emulated by software, but this page does not cover such
|
||||
cases.
|
||||
|
||||
Hardware data type support
|
||||
==========================================
|
||||
|
||||
AMD GPU hardware support for data types is listed in the following tables.
|
||||
The following tables provide information about AMD Instinct accelerators support
|
||||
for various data types. The MI200 series GPUs, which include MI210, MI250, and
|
||||
MI250X, are based on the CDNA2 architecture. The MI300 series GPUs, consisting
|
||||
of MI300A, MI300X, and MI325X, are built on the CDNA3 architecture.
|
||||
|
||||
Compute units support
|
||||
-------------------------------------------------------------------------------
|
||||
@@ -375,21 +411,23 @@ The following table lists data type support for atomic operations.
|
||||
|
||||
.. note::
|
||||
|
||||
For cases that are not natively supported, you can emulate atomic operations using software.
|
||||
Software-emulated atomic operations have high negative performance impact when they frequently
|
||||
access the same memory address.
|
||||
You can emulate atomic operations using software for cases that are not
|
||||
natively supported. Software-emulated atomic operations have a high negative
|
||||
performance impact when they frequently access the same memory address.
|
||||
|
||||
Data Type support in ROCm Libraries
|
||||
Data type support in ROCm libraries
|
||||
==========================================
|
||||
|
||||
ROCm library support for int8, float8 (E4M3), float8 (E5M2), int16, float16, bfloat16, int32,
|
||||
tensorfloat32, float32, int64, and float64 is listed in the following tables.
|
||||
ROCm library support for int8, float8 (E4M3), float8 (E5M2), int16, float16,
|
||||
bfloat16, int32, tensorfloat32, float32, int64, and float64 is listed in the
|
||||
following tables.
|
||||
|
||||
Libraries input/output type support
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
The following tables list ROCm library support for specific input and output data types. For a detailed
|
||||
description, refer to the corresponding library data type support page.
|
||||
The following tables list ROCm library support for specific input and output
|
||||
data types. Refer to the corresponding library data type support page for a
|
||||
detailed description.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
@@ -516,8 +554,9 @@ description, refer to the corresponding library data type support page.
|
||||
Libraries internal calculations type support
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
The following tables list ROCm library support for specific internal data types. For a detailed
|
||||
description, refer to the corresponding library data type support page.
|
||||
The following tables list ROCm library support for specific internal data types.
|
||||
Refer to the corresponding library data type support page for a detailed
|
||||
description.
|
||||
|
||||
.. tab-set::
|
||||
|
||||
|
||||
@@ -89,7 +89,10 @@ subtrees:
|
||||
title: Profile and debug
|
||||
- file: how-to/rocm-for-ai/inference-optimization/workload.rst
|
||||
title: Workload tuning
|
||||
|
||||
|
||||
- url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
|
||||
title: AI tutorials
|
||||
|
||||
- file: how-to/rocm-for-hpc/index.rst
|
||||
title: Use ROCm for HPC
|
||||
- file: how-to/system-optimization/index.rst
|
||||
@@ -126,6 +129,7 @@ subtrees:
|
||||
- url: https://github.com/amd/rocm-examples
|
||||
title: ROCm examples
|
||||
|
||||
|
||||
- caption: Conceptual
|
||||
entries:
|
||||
- file: conceptual/gpu-arch.md
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
rocm-docs-core==1.13.0
|
||||
rocm-docs-core==1.15.0
|
||||
sphinx-reredirects
|
||||
sphinx-sitemap
|
||||
|
||||
@@ -8,6 +8,13 @@ accessible-pygments==0.0.5
|
||||
# via pydata-sphinx-theme
|
||||
alabaster==1.0.0
|
||||
# via sphinx
|
||||
asttokens==3.0.0
|
||||
# via stack-data
|
||||
attrs==25.1.0
|
||||
# via
|
||||
# jsonschema
|
||||
# jupyter-cache
|
||||
# referencing
|
||||
babel==2.16.0
|
||||
# via
|
||||
# pydata-sphinx-theme
|
||||
@@ -25,9 +32,17 @@ cffi==1.17.1
|
||||
charset-normalizer==3.4.0
|
||||
# via requests
|
||||
click==8.1.7
|
||||
# via sphinx-external-toc
|
||||
# via
|
||||
# jupyter-cache
|
||||
# sphinx-external-toc
|
||||
comm==0.2.2
|
||||
# via ipykernel
|
||||
cryptography==43.0.3
|
||||
# via pyjwt
|
||||
debugpy==1.8.12
|
||||
# via ipykernel
|
||||
decorator==5.1.1
|
||||
# via ipython
|
||||
deprecated==1.2.15
|
||||
# via pygithub
|
||||
docutils==0.21.2
|
||||
@@ -36,34 +51,103 @@ docutils==0.21.2
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
exceptiongroup==1.2.2
|
||||
# via ipython
|
||||
executing==2.2.0
|
||||
# via stack-data
|
||||
fastjsonschema==2.20.0
|
||||
# via rocm-docs-core
|
||||
# via
|
||||
# nbformat
|
||||
# rocm-docs-core
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.43
|
||||
# via rocm-docs-core
|
||||
greenlet==3.1.1
|
||||
# via sqlalchemy
|
||||
idna==3.10
|
||||
# via requests
|
||||
imagesize==1.4.1
|
||||
# via sphinx
|
||||
importlib-metadata==8.6.1
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
ipykernel==6.29.5
|
||||
# via myst-nb
|
||||
ipython==8.31.0
|
||||
# via
|
||||
# ipykernel
|
||||
# myst-nb
|
||||
jedi==0.19.2
|
||||
# via ipython
|
||||
jinja2==3.1.5
|
||||
# via
|
||||
# myst-parser
|
||||
# sphinx
|
||||
jsonschema==4.23.0
|
||||
# via nbformat
|
||||
jsonschema-specifications==2024.10.1
|
||||
# via jsonschema
|
||||
jupyter-cache==1.0.1
|
||||
# via myst-nb
|
||||
jupyter-client==8.6.3
|
||||
# via
|
||||
# ipykernel
|
||||
# nbclient
|
||||
jupyter-core==5.7.2
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
# nbclient
|
||||
# nbformat
|
||||
markdown-it-py==3.0.0
|
||||
# via
|
||||
# mdit-py-plugins
|
||||
# myst-parser
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
matplotlib-inline==0.1.7
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
mdit-py-plugins==0.4.2
|
||||
# via myst-parser
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
myst-parser==4.0.0
|
||||
myst-nb==1.1.2
|
||||
# via rocm-docs-core
|
||||
myst-parser==4.0.0
|
||||
# via myst-nb
|
||||
nbclient==0.10.2
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
nbformat==5.10.4
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# nbclient
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
packaging==24.2
|
||||
# via sphinx
|
||||
# via
|
||||
# ipykernel
|
||||
# sphinx
|
||||
parso==0.8.4
|
||||
# via jedi
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
platformdirs==4.3.6
|
||||
# via jupyter-core
|
||||
prompt-toolkit==3.0.50
|
||||
# via ipython
|
||||
psutil==6.1.1
|
||||
# via ipykernel
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pycparser==2.22
|
||||
# via cffi
|
||||
pydata-sphinx-theme==0.16.0
|
||||
@@ -75,23 +159,42 @@ pygithub==2.5.0
|
||||
pygments==2.18.0
|
||||
# via
|
||||
# accessible-pygments
|
||||
# ipython
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
pyjwt[crypto]==2.10.0
|
||||
# via pygithub
|
||||
pynacl==1.5.0
|
||||
# via pygithub
|
||||
python-dateutil==2.9.0.post0
|
||||
# via jupyter-client
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# rocm-docs-core
|
||||
# sphinx-external-toc
|
||||
pyzmq==26.2.0
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
requests==2.32.3
|
||||
# via
|
||||
# pygithub
|
||||
# sphinx
|
||||
rocm-docs-core==1.13.0
|
||||
rocm-docs-core==1.15.0
|
||||
# via -r requirements.in
|
||||
rpds-py==0.22.3
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
six==1.17.0
|
||||
# via python-dateutil
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
snowballstemmer==2.2.0
|
||||
@@ -101,6 +204,7 @@ soupsieve==2.6
|
||||
sphinx==8.1.3
|
||||
# via
|
||||
# breathe
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# rocm-docs-core
|
||||
@@ -137,15 +241,43 @@ sphinxcontrib-qthelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-serializinghtml==2.0.0
|
||||
# via sphinx
|
||||
sqlalchemy==2.0.37
|
||||
# via jupyter-cache
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
tabulate==0.9.0
|
||||
# via jupyter-cache
|
||||
tomli==2.1.0
|
||||
# via sphinx
|
||||
tornado==6.4.2
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
traitlets==5.14.3
|
||||
# via
|
||||
# comm
|
||||
# ipykernel
|
||||
# ipython
|
||||
# jupyter-client
|
||||
# jupyter-core
|
||||
# matplotlib-inline
|
||||
# nbclient
|
||||
# nbformat
|
||||
typing-extensions==4.12.2
|
||||
# via
|
||||
# ipython
|
||||
# myst-nb
|
||||
# pydata-sphinx-theme
|
||||
# pygithub
|
||||
# referencing
|
||||
# sqlalchemy
|
||||
urllib3==2.2.3
|
||||
# via
|
||||
# pygithub
|
||||
# requests
|
||||
wcwidth==0.2.13
|
||||
# via prompt-toolkit
|
||||
wrapt==1.17.0
|
||||
# via deprecated
|
||||
zipp==3.21.0
|
||||
# via importlib-metadata
|
||||
|
||||
Reference in New Issue
Block a user