docs(jax-maxtext training): remove single-node for llama 3.1 405b (#5939 )

(cherry picked from commit a3a4440909)
Add docs for Maxtext 26.1 Docker release (#5936 )
2026-02-11 06:55:06 -05:00 · 2026-02-06 13:50:03 -05:00 · 2026-02-06 13:30:47 -05:00 · 2026-02-04 13:48:04 -05:00 · 2026-01-30 12:54:26 -05:00 · 2026-01-29 11:42:03 -05:00
7 changed files with 32 additions and 53 deletions
--- a/.azuredevops/components/MIVisionX.yml
+++ b/.azuredevops/components/MIVisionX.yml
@@ -62,9 +62,14 @@ parameters:
 - name: rocmDependencies
  type: object
  default:
+    - AMDMIGraphX
    - clr
    - half
+    - hipBLAS-common
+    - hipBLASLt
    - llvm-project
+    - MIOpen
+    - rocBLAS
    - rocDecode
    - rocm-cmake
    - rocminfo
@@ -77,7 +82,12 @@ parameters:
    - aomp
    - clr
    - half
+    - hipBLAS-common
+    - hipBLASLt
    - llvm-project
+    - MIOpen
+    - rocBLAS
+    - rocprofiler-register
    - ROCR-Runtime
    - roctracer
    - rpp
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -71,7 +71,6 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
  - job: ${{ parameters.componentName }}_build_${{ job.target }}
-    timeoutInMinutes: 120
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
--- a/.azuredevops/components/origami.yml
+++ b/.azuredevops/components/origami.yml
@@ -47,10 +47,8 @@ parameters:
  type: object
  default:
    - nanobind>=2.0.0
-    - numpy
    - pytest
    - pytest-cov
-    - torch
 - name: rocmDependencies
  type: object
  default:
@@ -103,7 +101,8 @@ jobs:
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
-    pool: ${{ variables.MEDIUM_BUILD_POOL }}
+    pool:
+      vmImage: ${{ variables.BASE_BUILD_POOL }}
    ${{ if eq(job.os, 'almalinux8') }}:
      container:
        image: rocmexternalcicd.azurecr.io/manylinux228:latest
@@ -240,7 +239,7 @@ jobs:
          targetType: inline
          workingDirectory: build
          script: |
-            cmake --build . --target origami-tests _pyorigami -- -j$(nproc)
+            cmake --build . --target origami-tests origami_python -- -j$(nproc)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
      # Run tests using CTest (discovers and runs both C++ and Python tests)
      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
--- a/default.xml
+++ b/default.xml
@@ -1,12 +1,13 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.2.0"
+    <default revision="refs/tags/rocm-7.1.1"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
 <!--list of projects for ROCm-->
    <project name="ROCK-Kernel-Driver" />
+    <project name="amdsmi" />
    <project name="rocm_bandwidth_test" />
    <project name="rocm-examples" />
 <!--HIP Projects-->
@@ -24,16 +25,30 @@
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="composable_kernel" />
+    <project groups="mathlibs" name="hipSOLVER" />
+    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
    <project groups="mathlibs" name="rocAL" />
    <project groups="mathlibs" name="rocALUTION" />
    <project groups="mathlibs" name="rocDecode" />
    <project groups="mathlibs" name="rocJPEG" />
+    <!-- The following components have been migrated to rocm-libraries:
+        hipBLAS-common hipBLAS hipBLASLt hipCUB
+        hipFFT hipRAND hipSPARSE hipSPARSELt
+        MIOpen rocBLAS rocFFT rocPRIM rocRAND
+        rocSPARSE rocThrust Tensile -->
    <project groups="mathlibs" name="rocm-libraries" />
+    <!-- The following components have been migrated to rocm-systems:
+        aqlprofile clr hip hip-tests hipother
+        rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute 
+        rocprofiler-register rocprofiler-sdk rocprofiler-systems 
+        rocprofiler rocr-runtime roctracer -->
    <project groups="mathlibs" name="rocm-systems" />
    <project groups="mathlibs" name="rocPyDecode" />
+    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocSHMEM" />
+    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
    <project groups="mathlibs" name="rpp" />
    <project groups="mathlibs" name="TransferBench" />
@@ -41,4 +56,4 @@
    <project name="aomp" path="openmp-extras/aomp" />
    <project name="aomp-extras" path="openmp-extras/aomp-extras" />
    <project name="flang" path="openmp-extras/flang" />
-</manifest>
+</manifest>
--- a/docs/how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
+++ b/docs/how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
@@ -130,7 +130,7 @@ After loading the model in this way, the model is fully ready to use the resourc
 torchtune for fine-tuning and inference
 =============================================

-`torchtune <https://meta-pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU 
+`torchtune <https://pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU
 model fine-tuning and inference with LLMs.

 #. Install torchtune using pip.
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -37,7 +37,7 @@ click==8.3.1
    #   sphinx-external-toc
 comm==0.2.3
    # via ipykernel
-cryptography==46.0.5
+cryptography==46.0.3
    # via pyjwt
 debugpy==1.8.19
    # via ipykernel
--- a/tools/rocm-build/rocm-7.2.0.xml
+++ b/tools/rocm-build/rocm-7.2.0.xml
@@ -1,44 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<manifest>
-    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.2.0"
-     remote="rocm-org"
-     sync-c="true"
-     sync-j="4" />
-<!--list of projects for ROCm-->
-    <project name="ROCK-Kernel-Driver" />
-    <project name="rocm_bandwidth_test" />
-    <project name="rocm-examples" />
-<!--HIP Projects-->
-    <project name="HIPIFY" />
-<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
-    <project name="half" />
-    <project name="llvm-project" />
-    <project name="spirv-llvm-translator" />
-<!-- gdb projects -->
-    <project name="ROCdbgapi" />
-    <project name="ROCgdb" />
-    <project name="rocr_debug_agent" />
-<!-- ROCm Libraries -->
-    <project groups="mathlibs" name="AMDMIGraphX" />
-    <project groups="mathlibs" name="MIVisionX" />
-    <project groups="mathlibs" name="ROCmValidationSuite" />
-    <project groups="mathlibs" name="composable_kernel" />
-    <project groups="mathlibs" name="hipfort" />
-    <project groups="mathlibs" name="rccl" />
-    <project groups="mathlibs" name="rocAL" />
-    <project groups="mathlibs" name="rocALUTION" />
-    <project groups="mathlibs" name="rocDecode" />
-    <project groups="mathlibs" name="rocJPEG" />
-    <project groups="mathlibs" name="rocm-libraries" />
-    <project groups="mathlibs" name="rocm-systems" />
-    <project groups="mathlibs" name="rocPyDecode" />
-    <project groups="mathlibs" name="rocSHMEM" />
-    <project groups="mathlibs" name="rocm-cmake" />
-    <project groups="mathlibs" name="rpp" />
-    <project groups="mathlibs" name="TransferBench" />
-<!-- Projects for OpenMP-Extras -->
-    <project name="aomp" path="openmp-extras/aomp" />
-    <project name="aomp-extras" path="openmp-extras/aomp-extras" />
-    <project name="flang" path="openmp-extras/flang" />
-</manifest>
Author	SHA1	Message	Date
peterjunpark	208443edec	docs(jax-maxtext training): remove single-node for llama 3.1 405b (#5939 ) (cherry picked from commit `a3a4440909`)	2026-02-06 13:50:03 -05:00
peterjunpark	b62e0546fd	Add docs for Maxtext 26.1 Docker release (#5936 ) * archive previous version * update doc * add multi node for llama3 405b fix (cherry picked from commit `1d5baf2c73`)	2026-02-06 13:30:47 -05:00
anisha-amd	de99ee0fe2	Docs: FlashInfer compatibility - frameworks release 26.01 (#5929 ) (#5930 )	2026-02-04 13:48:04 -05:00
peterjunpark	811188dc13	Update Primus docs for 26.1 release (#5911 ) (#5918 ) * archive previous versions update conf fix fix docker hub url fix * update history pages * update docker info * update configs * update primus commit (cherry picked from commit `d8b6ee47e3`)	2026-01-30 12:54:26 -05:00
peterjunpark	ec36bc9971	Publish vLLM / SGLang + MoRI distributed inference cookbooks (#5912 ) (#5913 ) * add recipes * clean up update clean up fix * update sglang docker instructions docker image tag add user to docker group fix * update pldm/bkc * update pldm/bkc * add bkc note * update bkc notes * update article info * update wordlist * fix linting issues * fix linting issues * fix linting * fix ref (cherry picked from commit `d1165b7359`)	2026-01-29 11:42:03 -05:00
Pratik Basyal	cd208e7d74	PLDM Note change 720 (#5894 ) * Note change * Minor change	2026-01-23 10:32:00 -05:00
Pratik Basyal	af8ea73581	720 reference link update and note fixes [Develop] (#5883 ) (#5884 ) * Links updated to 7.2.0 * COmpatibility note fixed	2026-01-22 12:21:46 -05:00
Pratik Basyal	f1c86d7d29	720 Post GA Known Issues update (#5879 ) * 7.2.0 Known issues and PLDM table updated (#5877) * Known issues and PLDM table updated * JAX workload known issues added * Minor changes * Minor update	2026-01-21 17:29:18 -05:00
Alex Xu	370816001e	Merge branch 'roc-7.2.x' into docs/7.2.0	2026-01-21 15:29:08 -05:00
Swati Rawat	d5994da509	Merge pull request #5872 from SwRaw/swaraw_cherrypick Cherrypicking replacement of rocm-smi with amd-smi from ROCm internal	2026-01-21 19:10:51 +05:30
srawat	c02f86c0e7	Update prerequisite-system-validation.rst	2026-01-21 17:43:10 +05:30
srawat	d3523c24d3	replace rocm-smi reference with amd-smi	2026-01-21 17:40:26 +05:30
Swati Rawat	1980239b81	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:31:41 +05:30
Swati Rawat	c75fd6f532	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:31:05 +05:30
Swati Rawat	72cb598190	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:30:33 +05:30
Swati Rawat	9b55b77aaa	Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:29:45 +05:30
Swati Rawat	8267303e1d	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:29:04 +05:30
Swati Rawat	86d2c4e891	Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst Co-authored-by: peterjunpark <git@peterjunpark.com>	2026-01-21 17:28:23 +05:30
srawat	2977e35330	Update single-gpu-fine-tuning-and-inference.rst	2026-01-21 17:27:13 +05:30
srawat	e95955f572	Update multi-gpu-fine-tuning-and-inference.rst	2026-01-21 17:27:13 +05:30