Merge pull request #3370 from ROCm/develop

Merge develop into roc-6.1.x
This commit is contained in:
Sam Wu
2024-06-28 15:36:56 -06:00
committed by GitHub
16 changed files with 227 additions and 109 deletions

View File

@@ -12,6 +12,7 @@ parameters:
- ninja-build
- git
- python3-pip
- libdrm-dev
- name: rocmDependencies
type: object
default:
@@ -24,10 +25,11 @@ parameters:
jobs:
- job: composable_kernel
timeoutInMinutes: 210
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ variables.MEDIUM_BUILD_POOL }}
pool: ${{ variables.ULTRA_BUILD_POOL }}
workspace:
clean: all
steps:
@@ -57,6 +59,6 @@ jobs:
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=gfx1030;gfx1100
-DINSTANCES_ONLY=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -65,3 +65,13 @@ jobs:
-DBUILD_CLIENTS_SAMPLES=OFF
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: hipSPARSE
publish: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
parameters:
sourceDir: $(Build.SourcesDirectory)/build/clients
contentsString: matrices/**
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: testMatrices

View File

@@ -0,0 +1,138 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: aptPackages
type: object
default:
- python3-pip
- python3-protobuf
- cmake
- ninja-build
- libprotobuf-dev
- libprotoc-dev
- protobuf-compiler
- liblmdb-dev
- pkg-config
- ffmpeg
- libavcodec-dev
- libavformat-dev
- libavutil-dev
- libswscale-dev
- libturbojpeg-dev
- libjpeg-turbo-official=3.0.2-20240124
- libopencv-dev
- name: pipModules
type: object
default:
- numpy
- opencv-python
- torch
- pillow
- name: rocmDependencies
type: object
default:
- rocm-cmake
- llvm-project
- ROCR-Runtime
- clr
- rocDecode
- half
- rpp
- MIVisionX
- aomp
jobs:
- job: rocAL
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
workspace:
clean: all
steps:
- task: Bash@3
displayName: 'Register libjpeg-turbo packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget -q -O- https://packagecloud.io/dcommander/libjpeg-turbo/gpgkey | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/libjpeg-turbo.gpg > /dev/null
echo "deb [signed-by=/etc/apt/trusted.gpg.d/libjpeg-turbo.gpg] https://packagecloud.io/dcommander/libjpeg-turbo/any/ any main" | sudo tee /etc/apt/sources.list.d/libjpeg-turbo.list
sudo apt update
apt-cache show libjpeg-turbo-official | grep Version
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- task: Bash@3
displayName: 'Clone PyBind11'
inputs:
targetType: inline
script: git clone --depth 1 -b v2.11.1 https://github.com/pybind/pybind11
workingDirectory: '$(Build.SourcesDirectory)'
- task: Bash@3
displayName: 'Clone RapidJSON'
inputs:
targetType: inline
script: git clone --depth 1 https://github.com/Tencent/rapidjson.git
workingDirectory: '$(Build.SourcesDirectory)'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: PyBind11
cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
-DDOWNLOAD_CATCH=ON
-DDOWNLOAD_EIGEN=ON
-GNinja
- task: Bash@3
displayName: 'Install PyBind11'
inputs:
targetType: inline
script: sudo cmake --build . --target install
workingDirectory: '$(Build.SourcesDirectory)/pybind11/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: RapidJSON
cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
-GNinja
- task: Bash@3
displayName: 'Install RapidJSON'
inputs:
targetType: inline
script: sudo cmake --build . --target install
workingDirectory: '$(Build.SourcesDirectory)/rapidjson/build'
# CI case: download latest default branch build
- ${{ if eq(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: staging
# manual build case: triggered by ROCm/ROCm repo
- ${{ if ne(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: tag-builds
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;/opt/libjpeg-turbo
-DCMAKE_INSTALL_PREFIX_PYTHON=$Python3_STDARCH
-DCMAKE_BUILD_TYPE=Release
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -0,0 +1,29 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: checkoutRef
type: string
default: refs/tags/$(LATEST_RELEASE_TAG)
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
- repository: release_repo
type: github
endpoint: ROCm
name: ROCm/rocAL
ref: ${{ parameters.checkoutRef }}
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/rocAL.yml
parameters:
checkoutRepo: release_repo
checkoutRef: ${{ parameters.checkoutRef }}

View File

@@ -27,6 +27,7 @@ parameters:
hipSPARSE: develop
llvm-project: amd-staging
MIOpen: develop
MIVisionX: develop
rdc: develop
rocBLAS: develop
ROCdbgapi : amd-master

View File

@@ -20,6 +20,9 @@ parameters:
- name: installDir
type: string
default: '$(Build.BinariesDirectory)'
- name: customInstallPath
type: boolean
default: true
- name: installEnabled
type: boolean
default: true
@@ -31,7 +34,10 @@ steps:
displayName: '${{parameters.componentName }} CMake Flags'
inputs:
workingDirectory: ${{ parameters.cmakeBuildDir }}
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
${{ if eq(parameters.customInstallPath, true) }}:
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
${{ else }}:
cmakeArgs: ${{ parameters.extraBuildFlags }} ..
# equivalent to running make $cmakeTargetDir from $cmakeBuildDir
# i.e., cd $cmakeBuildDir; make $cmakeTargetDir
- task: CMake@1

View File

@@ -38,6 +38,7 @@ parameters:
hipSPARSE: $(hipsparse-pipeline-id)
llvm-project: $(llvm-project-pipeline-id)
MIOpen: $(miopen-pipeline-id)
MIVisionX: $(mivisionx-pipeline-id)
rdc: $(rdc-pipeline-id)
rocBLAS: $(rocblas-pipeline-id)
ROCdbgapi : $(rocdbgapi-pipeline-id)
@@ -76,6 +77,7 @@ parameters:
hipSPARSE: $(hipsparse-tagged-pipeline-id)
llvm-project: $(llvm-project-tagged-pipeline-id)
MIOpen: $(miopen-tagged-pipeline-id)
MIVisionX: $(mivisionx-tagged-pipeline-id)
rdc: $(rdc-tagged-pipeline-id)
rocBLAS: $(rocblas-tagged-pipeline-id)
ROCdbgapi : $(rocdbgapi-tagged-pipeline-id)

View File

@@ -32,6 +32,7 @@ additional licenses. Please review individual repositories for more information.
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
| [AMD Common Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/develop/LICENCE) |
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
| [hipamd](https://github.com/ROCm/clr/tree/develop/hipamd) | [MIT](https://github.com/ROCm/clr/blob/develop/hipamd/LICENSE.txt) |
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/develop/opencl) | [MIT](https://github.com/ROCm/clr/blob/develop/opencl/LICENSE.txt) |
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
@@ -75,7 +76,8 @@ additional licenses. Please review individual repositories for more information.
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/develop/LICENSE) |
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
@@ -85,8 +87,8 @@ additional licenses. Please review individual repositories for more information.
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html)
Open sourced ROCm components are released via public GitHub
repositories, packages on `https://github.com/ROCm` and other distribution channels.
Proprietary products are only available with special permission from AMD. Currently, only
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com). Currently, only
one component of ROCm, `rocm-llvm-alt` is governed by a proprietary license.
Proprietary components are organized in a proprietary subdirectory in the package
repositories to distinguish from open sourced packages.

View File

@@ -57,34 +57,10 @@ article_pages = [
"date":"2024-06-04"
},
{"file":"install/windows/install-quick", "os":["windows"]},
{"file":"install/linux/install-quick", "os":["linux"]},
{"file":"install/linux/install", "os":["linux"]},
{"file":"install/linux/install-options", "os":["linux"]},
{"file":"install/linux/prerequisites", "os":["linux"]},
{"file":"install/docker", "os":["linux"]},
{"file":"install/magma-install", "os":["linux"]},
{"file":"install/pytorch-install", "os":["linux"]},
{"file":"install/tensorflow-install", "os":["linux"]},
{"file":"install/windows/install", "os":["windows"]},
{"file":"install/windows/prerequisites", "os":["windows"]},
{"file":"install/windows/cli/index", "os":["windows"]},
{"file":"install/windows/gui/index", "os":["windows"]},
{"file":"about/compatibility/docker-image-support-matrix", "os":["linux"]},
{"file":"about/compatibility/user-kernel-space-compat-matrix", "os":["linux"]},
{"file":"reference/library-index", "os":["linux"]},
{"file":"how-to/deep-learning-rocm", "os":["linux"]},
{"file":"how-to/gpu-enabled-mpi", "os":["linux"]},
{"file":"how-to/system-debugging", "os":["linux"]},
{"file":"how-to/tuning-guides", "os":["linux", "windows"]},
{"file":"rocm-a-z", "os":["linux", "windows"]},
]
exclude_patterns = ['temp']
@@ -108,5 +84,5 @@ html_theme_options = {
}
redirects = {
"reference/openmp/openmp": "../../about/compatibility/openmp.html"
"reference/openmp/openmp": "../../about/compatibility/openmp.html"
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 95 KiB

View File

@@ -8,44 +8,14 @@ Installing deep learning frameworks for ROCm
ROCm provides a comprehensive ecosystem for deep learning development, including
:ref:`libraries <artificial-intelligence-apis>` for optimized deep learning operations and ROCm-aware versions of popular
deep learning frameworks and libraries such as PyTorch, TensorFlow, JAX, and MAGMA. ROCm works closely with these
deep learning frameworks and libraries such as PyTorch, TensorFlow, and JAX. ROCm works closely with these
frameworks to ensure that framework-specific optimizations take advantage of AMD accelerator and GPU architectures.
The following guides cover installation processes for ROCm-aware deep learning frameworks.
.. grid::
.. grid-item::
:columns: 3
:doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
.. grid-item::
:columns: 3
:doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
:doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
.. grid-item::
:columns: 3
:doc:`MAGMA for ROCm <rocm-install-on-linux:how-to/3rd-party/magma-install>`
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
* :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
* :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
The following chart steps through typical installation workflows for installing deep learning frameworks for ROCm.

View File

@@ -28,18 +28,9 @@ graphs, tensor parallel multi-GPU, GPTQ, AWQ, and token speculation.
Installing vLLM
---------------
1. To install vLLM, run the following commands.
.. code-block:: shell
# Install from source
git clone https://github.com/ROCm/vllm.git
cd vllm
PYTORCH_ROCM_ARCH=gfx942 python setup.py install #MI300 series
.. _fine-tuning-llms-vllm-rocm-docker-image:
2. Run the following commands to build a Docker image ``vllm-rocm``.
1. Run the following commands to build a Docker image ``vllm-rocm``.
.. code-block:: shell
@@ -52,7 +43,7 @@ Installing vLLM
.. tab-item:: vLLM on a single-accelerator system
:sync: single
3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.
.. code-block:: shell
@@ -69,7 +60,7 @@ Installing vLLM
vllm-rocm \
bash
4. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.
3. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.
.. code-block:: shell
@@ -81,7 +72,7 @@ Installing vLLM
:alt: vLLM API server log message
:align: center
5. To test, send it a curl request containing a prompt.
4. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -92,11 +83,11 @@ Installing vLLM
.. code-block:: text
{"text":["What is AMD Instinct?\nAmd Instinct is a brand new line of high-performance computing (HPC) processors from Advanced Micro Devices (AMD). These processors are designed to deliver unparalleled performance for HPC workloads, including scientific simulations, data analytics, and machine learning.\nThe Instinct lineup includes a range of processors, from the entry-level Inst"]}
.. tab-item:: vLLM on a multi-accelerator system
:sync: multi
3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.
.. code-block:: shell
@@ -114,14 +105,14 @@ Installing vLLM
bash
4. To run API server on multiple GPUs, use the ``-tp`` or ``--tensor-parallel-size`` parameter. For example, to use two
3. To run API server on multiple GPUs, use the ``-tp`` or ``--tensor-parallel-size`` parameter. For example, to use two
GPUs, start the API server using the following command.
.. code-block:: shell
python -m vllm.entrypoints.api_server --model /app/model --dtype float16 -tp 2 --port 8000 &
5. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
4. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
isolate each instance to a different accelerator.
For example, to run two API servers, one on port 8000 using GPU 0 and 1, one on port 8001 using GPU 2 and 3, use a
@@ -132,7 +123,7 @@ Installing vLLM
ROCR_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 tp 2 --port 8000 &
ROCR_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 tp 2--port 8001 &
6. To test, send it a curl request containing a prompt.
5. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -163,27 +154,29 @@ speculation.
Install TGI
-----------
1. To install the TGI Docker image, run the following commands.
1. Launch the TGI Docker container in the host machine.
.. code-block:: shell
# Install from Dockerfile
git clone https://github.com/huggingface/text-generation-inference.git -b mi300-compat
cd text-generation-inference
docker build . -f Dockerfile.rocm
docker run --name tgi --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
--device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 256g
--net host -v $PWD:/data
--entrypoint "/bin/bash"
--env HUGGINGFACE_HUB_CACHE=/data
ghcr.io/huggingface/text-generation-inference:latest-rocm
.. tab-set::
.. tab-item:: TGI on a single-accelerator system
:sync: single
2. Launch a model using TGI server on a single accelerator.
2. Inside the container, launch a model using TGI server on a single accelerator.
.. code-block:: shell
export ROCM_USE_FLASH_ATTN_V2_TRITON=True
text-generation-launcher --model-id NousResearch/Meta-Llama-3-70B --dtype float16 --port 8000 &
3. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -191,26 +184,26 @@ Install TGI
curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
You should receive a response like the following.
.. code-block:: shell
data:{"index":20,"token":{"id":304,"text":" in","logprob":-1.2822266,"special":false},"generated_text":" AMD Instinct is a new family of data center GPUs designed to accelerate the most demanding workloads in","details":null}
.. tab-item:: TGI on a multi-accelerator system
2. Launch a model using TGI server on multiple accelerators (4 in this case).
2. Inside the container, launch a model using TGI server on multiple accelerators (4 in this case).
.. code-block:: shell
export ROCM_USE_FLASH_ATTN_V2_TRITON=True
text-generation-launcher --model-id NousResearch/Meta-Llama-3-8B --dtype float16 --port 8000 --num-shard 4 &
3. To test, send it a curl request containing a prompt.
.. code-block:: shell
curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
You should receive a response like the following.
.. code-block:: shell

View File

@@ -343,11 +343,6 @@ or :doc:`rocBLAS <rocblas:index>` is faster for a specific operation.
then required to strip out the kernel and create kernel
compilation and launch via Triton.
* For advanced ``matmul`` or ``conv`` configuration tuning, the ``inductor-gemm-tuner`` can
help. This implements the Triton ``conv``/``mm`` implementations used upstream
and allows specification of inputs and configuration tuning search space if new
tunings are found that can be added to the auto-tune list.
Other guidelines
================

View File

@@ -25,7 +25,6 @@ Our documentation is organized into the following categories:
:class-container: rocm-doc-grid
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-installation.jpg
:img-alt: Install documentation
:padding: 2
@@ -43,11 +42,9 @@ Our documentation is organized into the following categories:
* {doc}`PyTorch for ROCm<rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
* {doc}`TensorFlow for ROCm<rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
* {doc}`JAX for ROCm<rocm-install-on-linux:how-to/3rd-party/jax-install>`
* {doc}`MAGMA for ROCm<rocm-install-on-linux:how-to/3rd-party/magma-install>`
:::
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-compatibility.jpg
:img-alt: Compatibility information
:padding: 2
@@ -65,7 +62,6 @@ Our documentation is organized into the following categories:
<!-- markdownlint-disable MD051 -->
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-reference.jpg
:img-alt: Reference documentation
:padding: 2
@@ -86,14 +82,13 @@ Our documentation is organized into the following categories:
<!-- markdownlint-enable MD051 -->
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-howto.jpg
:img-alt: How-to documentation
:padding: 2
* [Using ROCm for AI](./how-to/rocm-for-ai/index.rst)
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
* [System tuning for various architectures](./how-to/tuning-guides.md)
* [MI100](./how-to/tuning-guides/mi100.md)
* [MI200](./how-to/tuning-guides/mi200.md)
@@ -109,7 +104,6 @@ Our documentation is organized into the following categories:
:::
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-conceptual.jpg
:img-alt: Conceptual documentation
:padding: 2

View File

@@ -58,6 +58,8 @@ subtrees:
- file: how-to/rocm-for-ai/train-a-model.rst
- file: how-to/rocm-for-ai/hugging-face-models.rst
- file: how-to/rocm-for-ai/deploy-your-model.rst
- file: how-to/rocm-for-hpc/index.rst
title: Using ROCm for HPC
- file: how-to/llm-fine-tuning-optimization/index.rst
title: Fine-tuning LLMs and inference optimization
subtrees:
@@ -79,8 +81,6 @@ subtrees:
- file: how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst
title: Optimizing Triton kernels
- file: how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst
- file: how-to/rocm-for-hpc/index.rst
title: Using ROCm for HPC
- file: how-to/tuning-guides.md
title: System optimization
subtrees:

View File

@@ -122,7 +122,7 @@ sphinx-external-toc==1.0.1
# via rocm-docs-core
sphinx-notfound-page==1.0.2
# via rocm-docs-core
sphinx-reredirects==0.1.3
sphinx-reredirects==0.1.4
# via -r requirements.in
sphinxcontrib-applehelp==1.0.8
# via sphinx