Use intersphinx links for deep learning (#5859 )

* Use intersphinx links for deep learning * Update deep-learning-rocm.rst remove Taichi * Update deep-learning-rocm.rst Change Install link to "link" * Apply suggestion from @randyh62 OK
Add missing APU entries to GPU hardware specifications (#646 ) (#5862 ) (#5863 )
2026-01-21 04:28:01 -05:00 · 2026-01-20 09:17:37 -08:00 · 2026-01-16 13:02:06 -05:00 · 2026-01-15 11:34:02 -05:00 · 2026-01-14 11:26:11 -05:00 · 2026-01-08 12:29:00 -05:00
15 changed files with 308 additions and 218 deletions
--- a/.azuredevops/components/HIP.yml
+++ b/.azuredevops/components/HIP.yml
@@ -34,7 +34,6 @@ parameters:
  default:
    - cmake
    - libnuma-dev
    - libsimde-dev
    - mesa-common-dev
    - ninja-build
    - ocl-icd-libopencl1
--- a/.azuredevops/components/origami.yml
+++ b/.azuredevops/components/origami.yml
@@ -39,7 +39,6 @@ parameters:
    - python3
    - python3-dev
    - python3-pip
    - python3-venv
    - libgtest-dev
    - libboost-filesystem-dev
    - libboost-program-options-dev
@@ -47,8 +46,6 @@ parameters:
  type: object
  default:
    - nanobind>=2.0.0
    - pytest
    - pytest-cov
 - name: rocmDependencies
  type: object
  default:
@@ -75,10 +72,8 @@ parameters:
      - { os: ubuntu2204, packageManager: apt }
      - { os: almalinux8, packageManager: dnf }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1151 }
      # - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
 - name: downstreamComponentMatrix
  type: object
  default:
@@ -121,11 +116,6 @@ jobs:
      parameters:
        dependencyList:
          - gtest
    - ${{ if ne(job.os, 'almalinux8') }}:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
        parameters:
          dependencyList:
            - catch2
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
@@ -147,7 +137,6 @@ jobs:
          -DORIGAMI_BUILD_SHARED_LIBS=ON
          -DORIGAMI_ENABLE_PYTHON=ON
          -DORIGAMI_BUILD_TESTING=ON
          -DORIGAMI_ENABLE_FETCH=ON
          -GNinja
    - ${{ if ne(job.os, 'almalinux8') }}:
      - task: PublishPipelineArtifact@1
@@ -180,6 +169,7 @@ jobs:
      dependsOn: origami_build_${{ job.os }}
      condition:
        and(succeeded(),
          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
          eq(${{ parameters.aggregatePipeline }}, False)
        )
@@ -190,30 +180,30 @@ jobs:
      workspace:
        clean: all
      steps:
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
        parameters:
          checkoutRepo: ${{ parameters.checkoutRepo }}
          sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          pipModules: ${{ parameters.pipModules }}
          packageManager: ${{ job.packageManager }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
        parameters:
          checkoutRepo: ${{ parameters.checkoutRepo }}
          sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
        parameters:
          dependencyList:
            - gtest
      - ${{ if ne(job.os, 'almalinux8') }}:
        - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
          parameters:
            dependencyList:
              - catch2
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
        parameters:
          preTargetFilter: ${{ parameters.componentName }}
          os: ${{ job.os }}
      - task: DownloadPipelineArtifact@2
        displayName: 'Download Build Directory Artifact'
        inputs:
          artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
          path: '$(Agent.BuildDirectory)/s/build'
      - task: DownloadPipelineArtifact@2
        displayName: 'Download Python Source Artifact'
        inputs:
          artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
          path: '$(Agent.BuildDirectory)/s/python'
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
        parameters:
          checkoutRef: ${{ parameters.checkoutRef }}
@@ -222,72 +212,25 @@ jobs:
          gpuTarget: ${{ job.target }}
          ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
      - task: CMake@1
        displayName: 'Origami Test CMake Configuration'
        inputs:
          cmakeArgs: >-
            -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
            -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
            -DORIGAMI_BUILD_SHARED_LIBS=ON
            -DORIGAMI_ENABLE_PYTHON=ON
            -DORIGAMI_BUILD_TESTING=ON
            -GNinja
            $(Agent.BuildDirectory)/s
      - task: Bash@3
        displayName: 'Build Origami Tests and Python Bindings'
        inputs:
          targetType: inline
          workingDirectory: build
          script: |
            cmake --build . --target origami-tests origami_python -- -j$(nproc)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      # Run tests using CTest (discovers and runs both C++ and Python tests)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
        parameters:
          componentName: ${{ parameters.componentName }}
          os: ${{ job.os }}
-          testDir: 'build'
+          testDir: '$(Agent.BuildDirectory)/rocm/bin'
-          testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+          testExecutable: './origami-tests'
-      # Test pip install workflow
+          testParameters: '--yaml origami-tests.yaml --gtest_output=xml:./test_output.xml --gtest_color=yes'
-      # - task: Bash@3
+      - script: |
-      #   displayName: 'Test Pip Install'
+          set -e
-      #   inputs:
+          export PYTHONPATH=$(Agent.BuildDirectory)/s/build/python:$PYTHONPATH
      #     targetType: inline
      #     script: |
      #       set -e
-      #       echo "==================================================================="
+          echo "--- Running origami_test.py ---"
-      #       echo "Testing pip install workflow (pip install -e .)"
+          python3 $(Agent.BuildDirectory)/s/python/origami_test.py
      #       echo "==================================================================="
-      #       # Set environment variables for pip install CMake build
+          echo "--- Running origami_grid_test.py ---"
-      #       export ROCM_PATH=$(Agent.BuildDirectory)/rocm
+          python3 $(Agent.BuildDirectory)/s/python/origami_grid_test.py
-      #       export CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm:$(Agent.BuildDirectory)/vendor
+        displayName: 'Run Python Binding Tests'
-      #       export CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
+        condition: succeeded()
      #       echo "ROCM_PATH: $ROCM_PATH"
      #       echo "CMAKE_PREFIX_PATH: $CMAKE_PREFIX_PATH"
      #       echo "CMAKE_CXX_COMPILER: $CMAKE_CXX_COMPILER"
      #       echo ""
      #       # Install from source directory
      #       cd "$(Agent.BuildDirectory)/s/python"
      #       pip install -e .
      #       # Verify import works
      #       echo ""
      #       echo "Verifying origami can be imported..."
      #       python3 -c "import origami; print('✓ Successfully imported origami')"
      #       # Run pytest on installed package
      #       echo ""
      #       echo "Running pytest tests..."
      #       python3 -m pytest tests/ -v -m "not slow" --tb=short
      #       echo ""
      #       echo "==================================================================="
      #       echo "Pip install test completed successfully"
      #       echo "==================================================================="
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
        parameters:
          aptPackages: ${{ parameters.aptPackages }}
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -30,7 +30,6 @@ parameters:
    - python3-pip
    - protobuf-compiler
    - libprotoc-dev
    - libopencv-dev
 - name: pipModules
  type: object
  default:
@@ -65,7 +64,6 @@ parameters:
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocAL
    - rocALUTION
    - rocBLAS
    - rocDecode
@@ -105,7 +103,6 @@ parameters:
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocAL
    - rocALUTION
    - rocBLAS
    - rocDecode
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -36,6 +36,7 @@ Andrej
 Arb
 Autocast
 autograd
 Backported
 BARs
 BatchNorm
 BLAS
@@ -203,9 +204,11 @@ GenAI
 GenZ
 GitHub
 Gitpod
 hardcoded
 HBM
 HCA
 HGX
 HLO
 HIPCC
 hipDataType
 HIPExtension
@@ -333,6 +336,7 @@ MoEs
 Mooncake
 Mpops
 Multicore
 multihost
 Multithreaded
 mx
 MXFP
@@ -1027,6 +1031,7 @@ uncacheable
 uncorrectable
 underoptimized
 unhandled
 unfused
 uninstallation
 unmapped
 unsqueeze
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -270,26 +270,26 @@ The [ROCm examples repository](https://github.com/ROCm/rocm-examples) has been e
 :margin: auto 0 auto auto
 :::{grid}
 :margin: auto 0 auto auto
-* [hipBLASLt](https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/)
+* [hipBLASLt](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/hipBLASLt)
-* [hipSPARSE](https://rocm.docs.amd.com/projects/hipSPARSE/en/latest/)
+* [hipSPARSE](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/hipSPARSE)
-* [hipSPARSELt](https://rocm.docs.amd.com/projects/hipSPARSELt/en/latest/)
+* [hipSPARSELt](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/hipSPARSELt)
-* [hipTensor](https://rocm.docs.amd.com/projects/hipTensor/en/latest/)
+* [hipTensor](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/hipTensor)
 :::
 :::{grid}
 :margin: auto 0 auto auto
-* [rocALUTION](https://rocm.docs.amd.com/projects/rocALUTION/en/latest/)
+* [rocALUTION](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocALUTION)
-* [ROCprofiler-SDK](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/)
+* [ROCprofiler-SDK](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocProfiler-SDK)
-* [rocWMMA](https://rocm.docs.amd.com/projects/rocWMMA/en/latest/)
+* [rocWMMA](https://github.com/ROCm/rocm-examples/tree/amd-staging/Libraries/rocWMMA)
 :::
 ::::
 Usage examples are now available for the following performance analysis tools:
-* [ROCm Compute Profiler](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/index.html)
+* [ROCm Compute Profiler](https://github.com/ROCm/rocm-examples/tree/amd-staging/Tools/rocprof-compute)
-* [ROCm Systems Profiler](https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/index.html)
+* [ROCm Systems Profiler](https://github.com/ROCm/rocm-examples/tree/amd-staging/Tools/rocprof-systems)
-* [rocprofv3](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/how-to/using-rocprofv3.html)
+* [rocprofv3](https://github.com/ROCm/rocm-examples/tree/amd-staging/Tools/rocprofv3)
-The complete source code for the [HIP Graph Tutorial](https://rocm.docs.amd.com/projects/HIP/en/latest/tutorial/graph_api.html) is also available as part of the ROCm examples.
+The complete source code for the [HIP Graph Tutorial](https://github.com/ROCm/rocm-examples/tree/amd-staging/HIP-Doc/Tutorials/graph_api) is also available as part of the ROCm examples.
 ### ROCm documentation updates
--- a/docs/compatibility/ml-compatibility/jax-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/jax-compatibility.rst
@@ -269,6 +269,33 @@ For a complete and up-to-date list of JAX public modules (for example, ``jax.num
  JAX API modules are maintained by the JAX project and is subject to change.
  Refer to the official Jax documentation for the most up-to-date information.
 Key features and enhancements for ROCm 7.1
 ===============================================================================
 - Enabled compilation of multihost HLO runner Python bindings.
  - Backported multihost HLO runner bindings and some related changes to
    :code:`FunctionalHloRunner`.
  - Added :code:`requirements_lock_3_12` to enable building for Python 3.12.
 - Removed hardcoded NHWC convolution layout for ``fp16`` precision to address the performance drops for ``fp16`` precision on gfx12xx GPUs.
 - ROCprofiler-SDK integration:
  - Integrated ROCprofiler-SDK (v3) to XLA to improve profiling of GPU events,
    support both time-based and step-based profiling.
  - Added unit tests for :code:`rocm_collector` and :code:`rocm_tracer`.
 - Added Triton unsupported conversion from ``f8E4M3FNUZ`` to ``fp16`` with
  rounding mode.
 - Introduced :code:`CudnnFusedConvDecomposer` to revert fused convolutions
  when :code:`ConvAlgorithmPicker` fails to find a fused algorithm, and removed
  unfused fallback paths from :code:`RocmFusedConvRunner`.
 Key features and enhancements for ROCm 7.0
 ===============================================================================
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -268,6 +268,3 @@ html_context = {
    "granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')],
    "scope_type" : [('Device', 'device'), ('System', 'system')]
 }
 # Disable figure and table numbering
 numfig = False
--- a/docs/how-to/deep-learning-rocm.rst
+++ b/docs/how-to/deep-learning-rocm.rst
@@ -24,7 +24,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
      - GitHub
    * - :doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`
-      - :doc:`Pytorch install <rocm-install-on-linux:install/3rd-party/pytorch-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/pytorch-install>`
      - 
        - Docker image
        - Wheels package
@@ -35,7 +35,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/pytorch"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`
-      - :doc:`TensorFlow install <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
      - 
        - Docker image
        - Wheels package
@@ -45,7 +45,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/tensorflow-upstream"><i class="fab fa-github fa-lg"></i></a> 
    * - :doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`
-      - :doc:`JAX install <rocm-install-on-linux:install/3rd-party/jax-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/jax-install>`
      - 
        - Docker image
      - .. raw:: html
@@ -53,7 +53,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/jax"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`verl <../compatibility/ml-compatibility/verl-compatibility>`
-      - :doc:`verl install <rocm-install-on-linux:install/3rd-party/verl-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/verl-install>`
      - 
        - Docker image
      - .. raw:: html
@@ -61,7 +61,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/verl"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`
-      - :doc:`Stanford Megatron-LM install <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>`
      - 
        - Docker image
      - .. raw:: html
@@ -69,7 +69,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/Stanford-Megatron-LM"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>`
-      - :doc:`DGL install <rocm-install-on-linux:install/3rd-party/dgl-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/dgl-install>`
      - 
        - Docker image
      - .. raw:: html
@@ -77,24 +77,15 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a> 
    * - :doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`
-      - :doc:`Megablocks install <rocm-install-on-linux:install/3rd-party/megablocks-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/megablocks-install>`
      - 
        - Docker image
      - .. raw:: html
          <a href="https://github.com/ROCm/megablocks"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>`
      - `Taichi install <https://rocm.docs.amd.com/projects/taichi/en/latest/install/taichi-install.html>`__
      - 
        - Docker image
        - Wheels package
      - .. raw:: html
          <a href="https://github.com/ROCm/taichi"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>`
-      - :doc:`Ray install <rocm-install-on-linux:install/3rd-party/ray-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/ray-install>`
      - 
        - Docker image
        - Wheels package
@@ -104,7 +95,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/ray"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>`
-      - :doc:`llama.cpp install <rocm-install-on-linux:install/3rd-party/llama-cpp-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/llama-cpp-install>`
      - 
        - Docker image
        - ROCm Base Docker image
@@ -113,7 +104,7 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://github.com/ROCm/llama.cpp"><i class="fab fa-github fa-lg"></i></a>
    * - :doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>`
-      - :doc:`FlashInfer install <rocm-install-on-linux:install/3rd-party/flashinfer-install>`
+      - :doc:`link <rocm-install-on-linux:install/3rd-party/flashinfer-install>`
      - 
        - Docker image
        - ROCm Base Docker image
--- a/docs/how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
+++ b/docs/how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
@@ -44,7 +44,7 @@ Setting up the base implementation environment
   .. code-block:: shell
-      rocm-smi --showproductname
+      amd-smi static --board
 #. Check that your GPUs are available to PyTorch.
@@ -65,8 +65,8 @@ Setting up the base implementation environment
 .. tip::
-   During training and inference, you can check the memory usage by running the ``rocm-smi`` command in your terminal.
+   During training and inference, you can check the memory usage by running the ``amd-smi`` command in your terminal.
-   This tool helps you see shows which GPUs are involved.
+   This tool helps you see which GPUs are involved.
 .. _fine-tuning-llms-multi-gpu-hugging-face-accelerate:
@@ -130,7 +130,7 @@ After loading the model in this way, the model is fully ready to use the resourc
 torchtune for fine-tuning and inference
 =============================================
-`torchtune <https://meta-pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU 
+`torchtune <https://pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU
 model fine-tuning and inference with LLMs.
 #. Install torchtune using pip.
--- a/docs/how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
+++ b/docs/how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
@@ -44,20 +44,19 @@ Setting up the base implementation environment
   .. code-block:: shell
-      rocm-smi --showproductname
+      amd-smi static --board
   Your output should look like this:
   .. code-block:: shell
-      ============================ ROCm System Management Interface ============================
+      GPU: 0
-      ====================================== Product Info ======================================
+         BOARD:
-      GPU[0]          : Card Series:          AMD Instinct MI300X OAM
+            MODEL_NUMBER: 102-G39203-0B
-      GPU[0]          : Card model:           0x74a1
+            PRODUCT_SERIAL: PCB079220-1150
-      GPU[0]          : Card vendor:          Advanced Micro Devices, Inc. [AMD/ATI]
+            FRU_ID: 113-AMDG392030B04-100-300000097H
-      GPU[0]          : Card SKU:             MI3SRIOV
+            PRODUCT_NAME: AMD Instinct MI325 OAM
-      ==========================================================================================
+            MANUFACTURER_NAME: AMD
      ================================== End of ROCm SMI Log ===================================
 #. Check that your GPUs are available to PyTorch.
--- a/docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
+++ b/docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
@@ -31,16 +31,16 @@ in the Instinct documentation for more information.
 Hardware verification with ROCm
 -------------------------------
-Use the command ``rocm-smi --setperfdeterminism 1900`` to set the max clock speed up to 1900 MHz
+Use the command ``amd-smi set --perf-determinism 1900`` to set the max clock speed up to 1900 MHz
 instead of the default 2100 MHz. This can reduce the chance of a PCC event lowering the attainable
 GPU clocks. This setting will not be required for new IFWI releases with the production PRC feature.
-You can restore this setting to its default value with the ``rocm-smi -r`` command.
+You can restore this setting to its default value with the ``amd-smi reset --clocks`` command.
 Run the command:
 .. code-block:: shell
-   rocm-smi --setperfdeterminism 1900
+   amd-smi set --perf-determinism 1900
 See `Hardware verfication for ROCm <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#hardware-verification-with-rocm>`_
 in the Instinct documentation for more information.
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst
@@ -108,16 +108,16 @@ for more information.
 Hardware verification with ROCm
 -------------------------------
-Use the command ``rocm-smi --setperfdeterminism 1900`` to set the max clock speed up to 1900 MHz
+Use the command ``amd-smi set --perf-determinism 1900`` to set the max clock speed up to 1900 MHz
 instead of the default 2100 MHz. This can reduce the chance of a PCC event lowering the attainable
 GPU clocks. This setting will not be required for new IFWI releases with the production PRC feature.
-You can restore this setting to its default value with the ``rocm-smi -r`` command.
+You can restore this setting to its default value with the ``amd-smi reset --clocks`` command.
 Run the command:
 .. code-block:: shell
-   rocm-smi --setperfdeterminism 1900
+   amd-smi set --perf-determinism 1900
 See `Hardware verification with ROCm <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#hardware-verification-with-rocm>`_ for more information.
--- a/docs/reference/gpu-arch-specs.rst
+++ b/docs/reference/gpu-arch-specs.rst
@@ -5,7 +5,7 @@
 GPU hardware specifications
 ===========================================
-The following tables provide an overview of the hardware specifications for AMD Instinct™ GPUs, and AMD Radeon™ PRO and Radeon™ GPUs.
+The following tables provide an overview of the hardware specifications for AMD Instinct™ GPUs, AMD Radeon™ PRO and Radeon™ GPUs, and AMD Ryzen™ APUs.
 For more information about ROCm hardware compatibility, see the ROCm `Compatibility matrix <https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html>`_.
@@ -18,7 +18,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
        :name: instinct-arch-spec-table
        *
-          - Model
+          - Name
          - Architecture
          - LLVM target name
          - VRAM (GiB)
@@ -297,7 +297,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
        :name: radeon-pro-arch-spec-table
        *
-          - Model
+          - Name
          - Architecture
          - LLVM target name
@@ -539,7 +539,7 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
        :name: radeon-arch-spec-table
        *
-          - Model
+          - Name
          - Architecture
          - LLVM target name
          - VRAM (GiB)
@@ -953,6 +953,127 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
          - 9
          - 0
  .. tab-item:: AMD Ryzen APUs
    .. list-table::
        :header-rows: 1
        :name: ryzen-arch-spec-table
        *
          - Name
          - Graphics model
          - Architecture
          - LLVM target name
          - VRAM (GiB)
          - Compute Units
          - Wavefront Size
          - LDS (KiB)
          - Infinity Cache (MiB)
          - L2 Cache (MiB)
          - Graphics L1 Cache (KiB)
          - L0 Vector Cache (KiB)
          - L0 Scalar Cache (KiB)
          - L0 Instruction Cache (KiB)
          - VGPR File (KiB)
          - SGPR File (KiB)
          - GFXIP Major version
          - GFXIP Minor version
        *
          - AMD Ryzen 7 7840U
          - Radeon 780M
          - RDNA3
          - gfx1103
          - Dynamic + carveout
          - 12
          - 32 or 64
          - 128
          - N/A
          - 2
          - 256
          - 32
          - 16
          - 32
          - 512
          - 32
          - 11
          - 0
        *
          - AMD Ryzen 9 270
          - Radeon 780M
          - RDNA3
          - gfx1103
          - Dynamic + carveout
          - 12
          - 32 or 64
          - 128
          - N/A
          - 2
          - 256
          - 32
          - 16
          - 32
          - 512
          - 32
          - 11
          - 0
        *
          - AMD Ryzen AI 9 HX 375
          - Radeon 890M
          - RDNA3.5
          - gfx1150
          - Dynamic + carveout
          - 16
          - 32 or 64
          - 128
          - N/A
          - 2
          - 256
          - 32
          - 16
          - 32
          - 512
          - 32
          - 11
          - 5
        *
          - AMD Ryzen AI Max+ PRO 395
          - Radeon 8060S
          - RDNA3.5
          - gfx1151
          - Dynamic + carveout
          - 40
          - 32 or 64
          - 128
          - 32
          - 2
          - 256
          - 32
          - 16
          - 32
          - 768
          - 32
          - 11
          - 5
        *
          - AMD Ryzen Al 7 350
          - Radeon 860M
          - RDNA3.5
          - gfx1152
          - Dynamic + carveout
          - 8
          - 32 or 64
          - 128
          - N/A
          - 1
          - 256
          - 32
          - 16
          - 32
          - 512
          - 32
          - 11
          - 5
 Glossary
 ========
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -29,27 +29,25 @@ subtrees:
    title: Deep learning frameworks
    subtrees:
    - entries:
-      - file: compatibility/ml-compatibility/pytorch-compatibility
+      - file: compatibility/ml-compatibility/pytorch-compatibility.rst
        title: PyTorch compatibility
-      - file: compatibility/ml-compatibility/tensorflow-compatibility
+      - file: compatibility/ml-compatibility/tensorflow-compatibility.rst
        title: TensorFlow compatibility
-      - file: compatibility/ml-compatibility/jax-compatibility
+      - file: compatibility/ml-compatibility/jax-compatibility.rst
        title: JAX compatibility
-      - file: compatibility/ml-compatibility/verl-compatibility
+      - file: compatibility/ml-compatibility/verl-compatibility.rst
        title: verl compatibility
-      - file: compatibility/ml-compatibility/stanford-megatron-lm-compatibility
+      - file: compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
        title: Stanford Megatron-LM compatibility
-      - file: compatibility/ml-compatibility/dgl-compatibility
+      - file: compatibility/ml-compatibility/dgl-compatibility.rst
        title: DGL compatibility
-      - file: compatibility/ml-compatibility/megablocks-compatibility
+      - file: compatibility/ml-compatibility/megablocks-compatibility.rst
        title: Megablocks compatibility
-      - file: compatibility/ml-compatibility/taichi-compatibility
+      - file: compatibility/ml-compatibility/ray-compatibility.rst
        title: Taichi compatibility
      - file: compatibility/ml-compatibility/ray-compatibility
        title: Ray compatibility
-      - file: compatibility/ml-compatibility/llama-cpp-compatibility
+      - file: compatibility/ml-compatibility/llama-cpp-compatibility.rst
        title: llama.cpp compatibility
-      - file: compatibility/ml-compatibility/flashinfer-compatibility
+      - file: compatibility/ml-compatibility/flashinfer-compatibility.rst
        title: FlashInfer compatibility
  - file: how-to/build-rocm.rst
    title: Build ROCm from source
@@ -77,8 +75,14 @@ subtrees:
        - entries:
          - file: how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.rst
            title: Train a model with Primus and Megatron-LM
            entries:
            - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
              title: Train a model with Megatron-LM
          - file: how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch.rst
            title: Train a model with Primus and PyTorch
            entries:
            - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
              title: Train a model with PyTorch
          - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
            title: Train a model with JAX MaxText
          - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
@@ -117,6 +121,8 @@ subtrees:
            title: SGLang inference performance testing
          - file: how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.rst
            title: SGLang distributed inference with Mooncake
          - file: how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
            title: xDiT diffusion inference
          - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
            title: Deploy your model
@@ -134,6 +140,8 @@ subtrees:
            title: Profile and debug
          - file: how-to/rocm-for-ai/inference-optimization/workload.rst
            title: Workload optimization
          - file: how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
            title: vLLM V1 performance optimization
      - url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
        title: AI tutorials
@@ -180,7 +188,7 @@ subtrees:
          - file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
            title: MI300 and MI200 performance counters
          - file: conceptual/gpu-arch/mi350-performance-counters.rst
-            title: MI350 series performance counters
+            title: MI350 Series performance counters
      - file: conceptual/gpu-arch/mi250.md
        title: MI250 microarchitecture
        subtrees:
@@ -214,6 +222,8 @@ subtrees:
    title: ROCm tools, compilers, and runtimes
  - file: reference/gpu-arch-specs.rst
  - file: reference/gpu-atomics-operation.rst
  - file: reference/env-variables.rst
    title: Environment variables
  - file: reference/precision-support.rst
    title: Data types and precision support
  - file: reference/graph-safe-support.rst
--- a/docs/what-is-rocm.rst
+++ b/docs/what-is-rocm.rst
@@ -123,7 +123,8 @@ Performance
 .. note::
-  `ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is a tool for visualizing and analyzing GPU thread trace data collected using :doc:`rocprofv3 <rocprofiler-sdk:index>`. Note that `ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is in an early access state. Running production workloads is not recommended.
+  `ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is a tool for visualizing and analyzing GPU thread trace data collected using :doc:`rocprofv3 <rocprofiler-sdk:index>`.
  Note that `ROCprof Compute Viewer <https://rocm.docs.amd.com/projects/rocprof-compute-viewer/en/amd-mainline/>`_ is in an early access state. Running production workloads is not recommended.
 Development
 ^^^^^^^^^^^
Author	SHA1	Message	Date
randyh62	2b83a962a0	Use intersphinx links for deep learning (#5859 ) * Use intersphinx links for deep learning * Update deep-learning-rocm.rst remove Taichi * Update deep-learning-rocm.rst Change Install link to "link" * Apply suggestion from @randyh62 OK	2026-01-20 09:17:37 -08:00
Jeffrey Novotny	54bf4c0319	Add missing APU entries to GPU hardware specifications (#646 ) (#5862 ) (#5863 ) * Add missing APU entries to GPU hardware specifications * Move Ryzen APUs to new tab * Add new column to Ryzen table and rename column elsewhere --------- (cherry picked from commit 7ab402a3b3e580ca27321635596ca14eca8c1277) (cherry picked from commit `33fbde69db`) Co-authored-by: alexxu-amd <159800977+alexxu-amd@users.noreply.github.com>	2026-01-16 13:02:06 -05:00
peterjunpark	4347a11bc4	Doc update for vLLM refactor #5855 (#5856 ) (cherry picked from commit `a745e45dcb`)	2026-01-15 11:34:02 -05:00
ROCm Docs Automation	2b7fde505f	Update rocm-docs-core to 1.31.2	2026-01-14 11:26:11 -05:00
anisha-amd	a98d6a5777	Docs: Ray release 25.12 and compatibility version format standardization (#5845 ) (#5846 )	2026-01-08 12:29:00 -05:00
Swati Rawat	38b271df55	Merge pull request #5843 from SwRaw/sw_cherrypick Cherrypicking amd-smi updates from ROCm internal	2026-01-08 20:33:14 +05:30
Swati Rawat	4184d1ee1f	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:22 +05:30
Swati Rawat	0786c328c1	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:22 +05:30
Swati Rawat	88ea6072f5	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:22 +05:30
Swati Rawat	b32dcc8570	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:22 +05:30
Swati Rawat	0faa92e922	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:21 +05:30
Swati Rawat	26ae989602	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-08 16:46:21 +05:30
srawat	4402dc4147	Update single-gpu-fine-tuning-and-inference.rst	2026-01-08 16:46:21 +05:30
srawat	5eda438e0a	Update multi-gpu-fine-tuning-and-inference.rst	2026-01-08 16:46:20 +05:30
srawat	049784e1a7	Update prerequisite-system-validation.rst	2026-01-08 16:42:18 +05:30
srawat	f12169c5b7	replace rocm-smi reference with amd-smi	2026-01-08 16:42:18 +05:30
peterjunpark	b35d1a0627	fix(primus-pytorch.rst): FP8 config instead of BF16 (#5839 ) (cherry picked from commit `2dc22ca890`)	2026-01-07 13:51:50 -05:00
Pratik Basyal	912618cb08	ROCM-core version fixed (#5827 ) (#5828 )	2026-01-02 16:10:16 -05:00
peterjunpark	7d2feaa8b1	Fix inconsistency in xDiT doc (#5823 ) Fix inconsistency in xDiT doc (cherry picked from commit `172b0f7c08`)	2025-12-29 10:29:59 -05:00
peterjunpark	7d0d114994	Merge pull request #5821 from peterjunpark/docs/7.1.1 [docs/7.1.1] Add xDiT and Primus doc updates	2025-12-29 08:49:44 -05:00
peterjunpark	2a65394e32	Update docs for xDiT diffusion inference 25.13 Docker release (#5820 ) * archive previous version * add xdit 25.13 * update history index * add perf results section (cherry picked from commit `c67fac78bd`)	2025-12-29 08:45:29 -05:00
peterjunpark	268c1332c9	Update training docs for Primus/25.11 (#5819 ) * update conf and toc.yml.in * archive previous versions archive data files update anchors * primus pytorch: remove training batch size args * update primus megatron run cmds multi-node * update primus pytorch update * update update * update docker tag (cherry picked from commit `e0b8ec4dfb`)	2025-12-29 08:45:17 -05:00
Pratik Basyal	374e0944dc	OS table removed from compatibility table [develop] (#5810 ) (#5811 ) * OS table removed from compatibility table * Feedback added * Azure Linux 3.0 and compatibility version update * Version fix * Review feedback added * Minor change	2025-12-23 16:38:03 -05:00
peterjunpark	512e311041	Update xdit diffusion inference history (#5808 ) (#5809 ) * Update xdit diffusion inference history * fix (cherry picked from commit `3a43bacdda`)	2025-12-22 11:14:57 -05:00
peterjunpark	ad4f486635	fix link to ROCm PyT docker image (#5803 ) (#5804 ) (cherry picked from commit `48d8fe139b`)	2025-12-19 15:51:20 -05:00
peterjunpark	485886712b	clean up formatting in FA2 page (#5795 ) (#5796 ) (cherry picked from commit `7455fe57b8`)	2025-12-19 09:38:20 -05:00
peterjunpark	1cd6a14a22	Update Flash Attention guidance in "Model acceleration libraries" (#5793 ) * flash attention update Signed-off-by: seungrok.jung <seungrok.jung@amd.com> flash attention update Signed-off-by: seungrok.jung <seungrok.jung@amd.com> flash attention update Signed-off-by: seungrok.jung <seungrok.jung@amd.com> sentence-case heading * Update docs/how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> --------- Co-authored-by: seungrok.jung <seungrok.jung@amd.com> Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> (cherry picked from commit `52c0a47e84`)	2025-12-19 09:00:40 -05:00
peterjunpark	a17f04a3b5	Update documentation for JAX training MaxText 25.11 release (#5789 ) (#5790 ) (cherry picked from commit `cbab9a465d`)	2025-12-18 11:26:42 -05:00
peterjunpark	94de66ef3f	[docs/7.1.1] Publish vLLM and xDiT doc updates (#5787 ) * vLLM inference benchmark 1210 (#5776) * Archive previous ver fix anchors * Update vllm.rst and data yaml for 20251210 (cherry picked from commit `1b4f25733d`) * xDiT diffusion inference v25.12 documentation update (#5786) * Add xdit-diffusion ROCm docs page. * Update template formatting and fix sphinx warnings * Add System Validation section. * Add sw component versions/commits. * Update to use latest v25.10 image instead of v25.9 * Update commands and add FLUX instructions. * Update Flux instructions. Change image tag. Describe as diffusion inference instead of specifically video. * git rm xdit-video-diffusion.rst * Docs for v25.12 * Add hyperlinks to components * Command fixes * -Diffusers suffix * Simplify yaml file and cleanup main rst page. * Spelling, added 'js' * fix merge conflict fix --------- Co-authored-by: Kristoffer <kristoffer.torp@amd.com> (cherry picked from commit `459283da3c`) --------- Co-authored-by: Kristoffer <kristoffer.torp@amd.com>	2025-12-17 10:28:30 -05:00
Pratik Basyal	e5cebe7b4e	Taichi removed from ROCm docs [Develop] (#5779 ) (#5781 ) * Taichi removed from ROCm docs * Warnings fixed	2025-12-16 13:24:12 -05:00
Pratik Basyal	7047cfa19c	Onnx and rocshmem version updated (#5760 ) (#5764 )	2025-12-11 17:11:05 -05:00
Matt Williams	de71bf5fa7	Merge pull request #5759 from ROCm/cherry-pick-701 Fixing link redirects (#5758)	2025-12-10 11:39:53 -05:00
Matt Williams	0d17c96f7f	Fixing link redirects (#5758 ) * Update multi-gpu-fine-tuning-and-inference.rst * Update pytorch-training-v25.6.rst * Update pytorch-compatibility.rst	2025-12-10 11:31:26 -05:00
anisha-amd	2f8c99f7f0	Docs: update verl compatibility - fix (#5755 )	2025-12-09 19:52:12 -05:00
anisha-amd	982927e866	Docs: verl framework - compatibility - 25.11 release (#5752 ) (#5753 )	2025-12-09 12:02:20 -05:00
peterjunpark	8f45b791fe	Fix Primus PyTorch doc: training.batch_size -> training.local_batch_size (#5748 ) (#5749 ) (cherry picked from commit `bf74351e5a`)	2025-12-08 13:59:00 -05:00
yugang-amd	f7c7587b10	xdit-diffusion v25.11 docs (#5743 )	2025-12-05 17:08:21 -05:00
Pratik Basyal	96b3c0d4f3	PyTorch 2.7 support added (#5740 ) (#5741 )	2025-12-04 17:00:34 -05:00
peterjunpark	d6d4d2ef92	fix docker hub links for primus:v25.10 (#5738 ) (cherry picked from commit `453751a86f`)	2025-12-04 09:21:53 -05:00
peterjunpark	8647ebcf76	Update training Docker docs for Primus 25.10 (#5737 ) (cherry picked from commit `fb644412d5`)	2025-12-04 09:21:53 -05:00
Pratik Basyal	48ca38b0dc	Conflict resolved (#5735 )	2025-12-03 09:02:57 -05:00
Istvan Kiss	acbd671e99	JAX key features and enhancements (#5708 ) Co-authored-by: Pratik Basyal <prbasyal@amd.com>	2025-12-01 19:52:07 +01:00
Pratik Basyal	133a97ec18	711 post GA known issue update [docs/711] (#5723 ) * 7.1.1 known issues post GA (#5721) * rocblas known issues added * Minor change * Update RELEASE.md Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> * Resolved * Update RELEASE.md Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * GitHub Issue added --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>	2025-11-30 00:16:26 -05:00
Pratik Basyal	2d40066f29	Merged cell removed for coloring issue (#5713 ) (#5714 )	2025-11-27 20:03:11 -05:00
ROCm Docs Automation	5d7fdace0e	Update rocm-docs-core to 1.30.0	2025-11-26 17:09:50 -05:00
Istvan Kiss	7dbcdc5deb	Update release notes links ROCm 7.1.1 (#5705 )	2025-11-26 20:02:33 +01:00
Pratik Basyal	a966db29ca	Known issue from 7.1.0 removed (#5702 ) (#5703 )	2025-11-26 12:30:28 -05:00
Pratik Basyal	9ea8a48b3a	Link and PyTorch version updated (#5700 ) (#5701 )	2025-11-26 12:01:12 -05:00
Alex Xu	9956d72614	fix dependency	2025-11-26 11:42:22 -05:00
Alex Xu	305d24f486	Merge branch 'roc-7.1.x' into docs/7.1.1	2025-11-26 11:37:06 -05:00
Alex Xu	26f6b6b3e1	Merge branch 'roc-7.1.x' into docs/7.1.1	2025-11-26 11:29:02 -05:00
Alex Xu	d4cdbd79a3	Merge branch 'develop' into docs/7.1.1	2025-11-26 08:47:19 -05:00
alexxu-amd	26d1ab7d27	Update documentation requirements	2025-11-25 16:30:46 -05:00
alexxu-amd	272c9f6be3	Update documentation requirements	2025-11-25 15:37:04 -05:00