From 7c7ede0ac53327ebd82f4136a529b267df3d8a0c Mon Sep 17 00:00:00 2001 From: Peter Park Date: Wed, 5 Jun 2024 08:11:00 -0700 Subject: [PATCH] Rename fine-tuning and optimization guide directory and fix index.md (#3242) * Mv fine-tuning and optimization files * Reorder index.md * Rename images directory * Fix internal links --- .../attention-module.png | Bin .../ck-comparisons.jpg | Bin .../ck-compilation.jpg | Bin .../ck-inference_flow.jpg | Bin .../ck-kernel_launch.jpg | Bin .../ck-operation_flow.jpg | Bin .../ck-root_instance.jpg | Bin .../ck-template_parameters.jpg | Bin .../compute-unit.png | Bin .../occupancy-vgpr.png | Bin .../omniperf-analysis.png | Bin .../omnitrace-timeline.png | Bin .../perfetto-trace.svg | 0 .../profiling-perfetto-ui.png | Bin .../tunableop.png | Bin .../vllm-single-gpu-log.png | Bin .../weight-update.png | Bin docs/how-to/deep-learning-rocm.rst | 2 +- .../fine-tuning-and-inference.rst | 0 .../index.rst | 0 .../llm-inference-frameworks.rst | 2 +- .../model-acceleration-libraries.rst | 4 ++-- .../model-quantization.rst | 0 .../multi-gpu-fine-tuning-and-inference.rst | 0 .../optimizing-triton-kernel.rst | 4 ++-- .../optimizing-with-composable-kernel.md | 14 +++++------ .../overview.rst | 2 +- .../profiling-and-debugging.rst | 6 ++--- .../single-gpu-fine-tuning-and-inference.rst | 0 docs/how-to/rocm-for-ai/train-a-model.rst | 2 +- docs/index.md | 6 ++--- docs/sphinx/_toc.yml.in | 22 +++++++++--------- 32 files changed, 32 insertions(+), 32 deletions(-) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/attention-module.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-comparisons.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-compilation.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-inference_flow.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-kernel_launch.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-operation_flow.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-root_instance.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/ck-template_parameters.jpg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/compute-unit.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/occupancy-vgpr.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/omniperf-analysis.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/omnitrace-timeline.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/perfetto-trace.svg (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/profiling-perfetto-ui.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/tunableop.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/vllm-single-gpu-log.png (100%) rename docs/data/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/weight-update.png (100%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/fine-tuning-and-inference.rst (100%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/index.rst (100%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/llm-inference-frameworks.rst (98%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/model-acceleration-libraries.rst (98%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/model-quantization.rst (100%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/multi-gpu-fine-tuning-and-inference.rst (100%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/optimizing-triton-kernel.rst (99%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/optimizing-with-composable-kernel.md (97%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/overview.rst (98%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/profiling-and-debugging.rst (97%) rename docs/how-to/{fine-tuning-llms => llm-fine-tuning-optimization}/single-gpu-fine-tuning-and-inference.rst (100%) diff --git a/docs/data/how-to/fine-tuning-llms/attention-module.png b/docs/data/how-to/llm-fine-tuning-optimization/attention-module.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/attention-module.png rename to docs/data/how-to/llm-fine-tuning-optimization/attention-module.png diff --git a/docs/data/how-to/fine-tuning-llms/ck-comparisons.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-comparisons.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-compilation.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-compilation.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-inference_flow.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-inference_flow.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-kernel_launch.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-kernel_launch.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-operation_flow.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-operation_flow.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-root_instance.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-root_instance.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg diff --git a/docs/data/how-to/fine-tuning-llms/ck-template_parameters.jpg b/docs/data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/ck-template_parameters.jpg rename to docs/data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg diff --git a/docs/data/how-to/fine-tuning-llms/compute-unit.png b/docs/data/how-to/llm-fine-tuning-optimization/compute-unit.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/compute-unit.png rename to docs/data/how-to/llm-fine-tuning-optimization/compute-unit.png diff --git a/docs/data/how-to/fine-tuning-llms/occupancy-vgpr.png b/docs/data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/occupancy-vgpr.png rename to docs/data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png diff --git a/docs/data/how-to/fine-tuning-llms/omniperf-analysis.png b/docs/data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/omniperf-analysis.png rename to docs/data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png diff --git a/docs/data/how-to/fine-tuning-llms/omnitrace-timeline.png b/docs/data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/omnitrace-timeline.png rename to docs/data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png diff --git a/docs/data/how-to/fine-tuning-llms/perfetto-trace.svg b/docs/data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg similarity index 100% rename from docs/data/how-to/fine-tuning-llms/perfetto-trace.svg rename to docs/data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg diff --git a/docs/data/how-to/fine-tuning-llms/profiling-perfetto-ui.png b/docs/data/how-to/llm-fine-tuning-optimization/profiling-perfetto-ui.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/profiling-perfetto-ui.png rename to docs/data/how-to/llm-fine-tuning-optimization/profiling-perfetto-ui.png diff --git a/docs/data/how-to/fine-tuning-llms/tunableop.png b/docs/data/how-to/llm-fine-tuning-optimization/tunableop.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/tunableop.png rename to docs/data/how-to/llm-fine-tuning-optimization/tunableop.png diff --git a/docs/data/how-to/fine-tuning-llms/vllm-single-gpu-log.png b/docs/data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/vllm-single-gpu-log.png rename to docs/data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png diff --git a/docs/data/how-to/fine-tuning-llms/weight-update.png b/docs/data/how-to/llm-fine-tuning-optimization/weight-update.png similarity index 100% rename from docs/data/how-to/fine-tuning-llms/weight-update.png rename to docs/data/how-to/llm-fine-tuning-optimization/weight-update.png diff --git a/docs/how-to/deep-learning-rocm.rst b/docs/how-to/deep-learning-rocm.rst index d638e1cb3..cdd95210b 100644 --- a/docs/how-to/deep-learning-rocm.rst +++ b/docs/how-to/deep-learning-rocm.rst @@ -65,4 +65,4 @@ through the following guides. * :doc:`rocm-for-ai/index` -* :doc:`fine-tuning-llms/index` +* :doc:`llm-fine-tuning-optimization/index` diff --git a/docs/how-to/fine-tuning-llms/fine-tuning-and-inference.rst b/docs/how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst similarity index 100% rename from docs/how-to/fine-tuning-llms/fine-tuning-and-inference.rst rename to docs/how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst diff --git a/docs/how-to/fine-tuning-llms/index.rst b/docs/how-to/llm-fine-tuning-optimization/index.rst similarity index 100% rename from docs/how-to/fine-tuning-llms/index.rst rename to docs/how-to/llm-fine-tuning-optimization/index.rst diff --git a/docs/how-to/fine-tuning-llms/llm-inference-frameworks.rst b/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst similarity index 98% rename from docs/how-to/fine-tuning-llms/llm-inference-frameworks.rst rename to docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst index 646b99d4e..c7bb5eada 100644 --- a/docs/how-to/fine-tuning-llms/llm-inference-frameworks.rst +++ b/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst @@ -77,7 +77,7 @@ Installing vLLM The following log message is displayed in your command line indicates that the server is listening for requests. - .. image:: ../../data/how-to/fine-tuning-llms/vllm-single-gpu-log.png + .. image:: ../../data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png :alt: vLLM API server log message :align: center diff --git a/docs/how-to/fine-tuning-llms/model-acceleration-libraries.rst b/docs/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst similarity index 98% rename from docs/how-to/fine-tuning-llms/model-acceleration-libraries.rst rename to docs/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst index 5a57a6b83..2e473805d 100644 --- a/docs/how-to/fine-tuning-llms/model-acceleration-libraries.rst +++ b/docs/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst @@ -18,7 +18,7 @@ Attention (GQA), and Multi-Query Attention (MQA). This reduction in memory movem time-to-first-token (TTFT) latency for large batch sizes and long prompt sequences, thereby enhancing overall performance. -.. image:: ../../data/how-to/fine-tuning-llms/attention-module.png +.. image:: ../../data/how-to/llm-fine-tuning-optimization/attention-module.png :alt: Attention module of a large language module utilizing tiling :align: center @@ -243,7 +243,7 @@ page describes the options. Validator,ROCBLAS_VERSION,4.1.0-cefa4a9b-dirty GemmTunableOp_float_TN,tn_200_100_20,Gemm_Rocblas_32323,0.00669595 -.. image:: ../../data/how-to/fine-tuning-llms/tunableop.png +.. image:: ../../data/how-to/llm-fine-tuning-optimization/tunableop.png :alt: GEMM and TunableOp :align: center diff --git a/docs/how-to/fine-tuning-llms/model-quantization.rst b/docs/how-to/llm-fine-tuning-optimization/model-quantization.rst similarity index 100% rename from docs/how-to/fine-tuning-llms/model-quantization.rst rename to docs/how-to/llm-fine-tuning-optimization/model-quantization.rst diff --git a/docs/how-to/fine-tuning-llms/multi-gpu-fine-tuning-and-inference.rst b/docs/how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst similarity index 100% rename from docs/how-to/fine-tuning-llms/multi-gpu-fine-tuning-and-inference.rst rename to docs/how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst diff --git a/docs/how-to/fine-tuning-llms/optimizing-triton-kernel.rst b/docs/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst similarity index 99% rename from docs/how-to/fine-tuning-llms/optimizing-triton-kernel.rst rename to docs/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst index cac5493dc..28e36fa62 100644 --- a/docs/how-to/fine-tuning-llms/optimizing-triton-kernel.rst +++ b/docs/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst @@ -31,7 +31,7 @@ Each accelerator or GPU has multiple Compute Units (CUs) and various CUs do comp can a compute kernel can allocate its task to? For the :doc:`AMD MI300X accelerator <../../reference/gpu-arch-specs>`, the grid should have at least 1024 thread blocks or workgroups. -.. figure:: ../../data/how-to/fine-tuning-llms/compute-unit.png +.. figure:: ../../data/how-to/llm-fine-tuning-optimization/compute-unit.png Schematic representation of a CU in the CDNA2 or CDNA3 architecture. @@ -187,7 +187,7 @@ Kernel occupancy .. _fine-tuning-llms-occupancy-vgpr-table: -.. figure:: ../../data/how-to/fine-tuning-llms/occupancy-vgpr.png +.. figure:: ../../data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png :alt: Occupancy related to VGPR usage in an Instinct MI300X accelerator. :align: center diff --git a/docs/how-to/fine-tuning-llms/optimizing-with-composable-kernel.md b/docs/how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md similarity index 97% rename from docs/how-to/fine-tuning-llms/optimizing-with-composable-kernel.md rename to docs/how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md index 6196f9d76..ef7066e6c 100644 --- a/docs/how-to/fine-tuning-llms/optimizing-with-composable-kernel.md +++ b/docs/how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md @@ -32,7 +32,7 @@ The template parameters of the instance are grouped into four parameter types: ================ ### Figure 2 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-template_parameters.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg The template parameters of the selected GEMM kernel are classified into four groups. These template parameter groups should be defined properly before running the instance. ``` @@ -126,7 +126,7 @@ The row and column, and stride information of input matrices are also passed to ================ ### Figure 3 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-kernel_launch.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg Templated kernel launching consists of kernel instantiation, making arguments by passing in actual application parameters, creating an invoker, and running the instance through the invoker. ``` @@ -155,7 +155,7 @@ The first operation in the process is to perform the multiplication of input mat ================ ### Figure 4 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-operation_flow.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg Operation flow. ``` @@ -171,7 +171,7 @@ Here, we use [DeviceBatchedGemmMultiD_Xdl](https://github.com/ROCm/composable_ke ================ ### Figure 5 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-root_instance.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg Use the ‘DeviceBatchedGemmMultiD_Xdl’ instance as a root. ``` @@ -421,7 +421,7 @@ Run `python setup.py install` to build and install the extension. It should look ================ ### Figure 6 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-compilation.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg Compilation and installation of the INT8 kernels. ``` @@ -433,7 +433,7 @@ The implementation architecture of running SmoothQuant models on MI300X GPUs is ================ ### Figure 7 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-inference_flow.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg The implementation architecture of running SmoothQuant models on AMD MI300X accelerators. ``` @@ -459,7 +459,7 @@ Figure 8 shows the performance comparisons between the original FP16 and the Smo ================ ### Figure 8 ================ --> -```{figure} ../../data/how-to/fine-tuning-llms/ck-comparisons.jpg +```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg Performance comparisons between the original FP16 and the SmoothQuant-quantized INT8 models on a single MI300X accelerator. ``` diff --git a/docs/how-to/fine-tuning-llms/overview.rst b/docs/how-to/llm-fine-tuning-optimization/overview.rst similarity index 98% rename from docs/how-to/fine-tuning-llms/overview.rst rename to docs/how-to/llm-fine-tuning-optimization/overview.rst index b4493f071..061470a99 100644 --- a/docs/how-to/fine-tuning-llms/overview.rst +++ b/docs/how-to/llm-fine-tuning-optimization/overview.rst @@ -41,7 +41,7 @@ The weight update is as follows: :math:`W_{updated} = W + ΔW`. If the weight matrix :math:`W` contains 7B parameters, then the weight update matrix :math:`ΔW` should also contain 7B parameters. Therefore, the :math:`ΔW` calculation is computationally and memory intensive. -.. figure:: ../../data/how-to/fine-tuning-llms/weight-update.png +.. figure:: ../../data/how-to/llm-fine-tuning-optimization/weight-update.png :alt: Weight update diagram (a) Weight update in regular fine-tuning. (b) Weight update in LoRA where the product of matrix A (:math:`M\times K`) diff --git a/docs/how-to/fine-tuning-llms/profiling-and-debugging.rst b/docs/how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst similarity index 97% rename from docs/how-to/fine-tuning-llms/profiling-and-debugging.rst rename to docs/how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst index 4371fca4a..034b5021c 100644 --- a/docs/how-to/fine-tuning-llms/profiling-and-debugging.rst +++ b/docs/how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst @@ -38,7 +38,7 @@ You can then visualize and view these metrics using an open-source profile visua shows transactions denoting the CPU activities that launch GPU kernels while the lower section shows the actual GPU activities where it processes the ``resnet18`` inferences layer by layer. - .. figure:: ../../data/how-to/fine-tuning-llms/perfetto-trace.svg + .. figure:: ../../data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg Perfetto trace visualization example. @@ -100,7 +100,7 @@ analyze bottlenecks and stressors for their computational workloads on AMD Insti Omniperf collects hardware counters in multiple passes, and will therefore re-run the application during each pass to collect different sets of metrics. -.. figure:: ../../data/how-to/fine-tuning-llms/omniperf-analysis.png +.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png Omniperf memory chat analysis panel. @@ -130,7 +130,7 @@ hardware counters are also included. have the greatest impact on the end-to-end execution of the application and to discover what else is happening on the system during a performance bottleneck. -.. figure:: ../../data/how-to/fine-tuning-llms/omnitrace-timeline.png +.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png Omnitrace timeline trace example. diff --git a/docs/how-to/fine-tuning-llms/single-gpu-fine-tuning-and-inference.rst b/docs/how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst similarity index 100% rename from docs/how-to/fine-tuning-llms/single-gpu-fine-tuning-and-inference.rst rename to docs/how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst diff --git a/docs/how-to/rocm-for-ai/train-a-model.rst b/docs/how-to/rocm-for-ai/train-a-model.rst index d7db257ca..ac10ebf12 100644 --- a/docs/how-to/rocm-for-ai/train-a-model.rst +++ b/docs/how-to/rocm-for-ai/train-a-model.rst @@ -110,7 +110,7 @@ Fine-tuning your model ROCm supports multiple techniques for :ref:`optimizing fine-tuning `, for example, LoRA, QLoRA, PEFT, and FSDP. -Learn more about challenges and solutions for model fine-tuning in :doc:`../fine-tuning-llms/index`. +Learn more about challenges and solutions for model fine-tuning in :doc:`../llm-fine-tuning-optimization/index`. The following developer blogs showcase examples of how to fine-tune a model on an AMD accelerator or GPU. diff --git a/docs/index.md b/docs/index.md index 98c2a12d1..90c4d6600 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,16 +34,16 @@ Our documentation is organized into the following categories: * {doc}`Quick start guide` * {doc}`Linux install guide` * {doc}`Package manager integration` + * {doc}`Install Docker containers` + * {doc}`ROCm & Spack` * Windows * {doc}`Windows install guide` * {doc}`Application deployment guidelines` * [Deep learning frameworks](./how-to/deep-learning-rocm.rst) - * {doc}`Install Docker containers` * {doc}`PyTorch for ROCm` * {doc}`TensorFlow for ROCm` * {doc}`JAX for ROCm` * {doc}`MAGMA for ROCm` - * {doc}`ROCm & Spack` ::: :::{grid-item-card} @@ -92,7 +92,7 @@ Our documentation is organized into the following categories: :padding: 2 * [Using ROCm for AI](./how-to/rocm-for-ai/index.rst) -* [Fine-tuning LLMs and inference optimization](./how-to/fine-tuning-llms/index.rst) +* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst) * [System tuning for various architectures](./how-to/tuning-guides.md) * [MI100](./how-to/tuning-guides/mi100.md) * [MI200](./how-to/tuning-guides/mi200.md) diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 085035e24..eea43ac4a 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -58,27 +58,27 @@ subtrees: - file: how-to/rocm-for-ai/train-a-model.rst - file: how-to/rocm-for-ai/hugging-face-models.rst - file: how-to/rocm-for-ai/deploy-your-model.rst - - file: how-to/fine-tuning-llms/index.rst + - file: how-to/llm-fine-tuning-optimization/index.rst title: Fine-tuning LLMs and inference optimization subtrees: - entries: - - file: how-to/fine-tuning-llms/overview.rst + - file: how-to/llm-fine-tuning-optimization/overview.rst title: Conceptual overview - - file: how-to/fine-tuning-llms/fine-tuning-and-inference.rst + - file: how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst subtrees: - entries: - - file: how-to/fine-tuning-llms/single-gpu-fine-tuning-and-inference.rst + - file: how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst title: Using a single accelerator - - file: how-to/fine-tuning-llms/multi-gpu-fine-tuning-and-inference.rst + - file: how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst title: Using multiple accelerators - - file: how-to/fine-tuning-llms/model-quantization.rst - - file: how-to/fine-tuning-llms/model-acceleration-libraries.rst - - file: how-to/fine-tuning-llms/llm-inference-frameworks.rst - - file: how-to/fine-tuning-llms/optimizing-with-composable-kernel.md + - file: how-to/llm-fine-tuning-optimization/model-quantization.rst + - file: how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst + - file: how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst + - file: how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md title: Optimizing with Composable Kernel - - file: how-to/fine-tuning-llms/optimizing-triton-kernel.rst + - file: how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst title: Optimizing Triton kernels - - file: how-to/fine-tuning-llms/profiling-and-debugging.rst + - file: how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst - file: how-to/tuning-guides.md title: System optimization subtrees: