Compare commits

..

20 Commits

Author SHA1 Message Date
peterjunpark
208443edec docs(jax-maxtext training): remove single-node for llama 3.1 405b (#5939)
(cherry picked from commit a3a4440909)
2026-02-06 13:50:03 -05:00
peterjunpark
b62e0546fd Add docs for Maxtext 26.1 Docker release (#5936)
* archive previous version

* update doc

* add multi node for llama3 405b

fix

(cherry picked from commit 1d5baf2c73)
2026-02-06 13:30:47 -05:00
anisha-amd
de99ee0fe2 Docs: FlashInfer compatibility - frameworks release 26.01 (#5929) (#5930) 2026-02-04 13:48:04 -05:00
peterjunpark
811188dc13 Update Primus docs for 26.1 release (#5911) (#5918)
* archive previous versions

update conf

fix

fix docker hub url

fix

* update history pages

* update docker info

* update configs

* update primus commit

(cherry picked from commit d8b6ee47e3)
2026-01-30 12:54:26 -05:00
peterjunpark
ec36bc9971 Publish vLLM / SGLang + MoRI distributed inference cookbooks (#5912) (#5913)
* add recipes

* clean up

update

clean up

fix

* update sglang docker instructions

docker image tag
add user to docker group

fix

* update pldm/bkc

* update pldm/bkc

* add bkc note

* update bkc notes

* update article info

* update wordlist

* fix linting issues

* fix linting issues

* fix linting

* fix ref

(cherry picked from commit d1165b7359)
2026-01-29 11:42:03 -05:00
Pratik Basyal
cd208e7d74 PLDM Note change 720 (#5894)
* Note change

* Minor change
2026-01-23 10:32:00 -05:00
Pratik Basyal
af8ea73581 720 reference link update and note fixes [Develop] (#5883) (#5884)
* Links updated to 7.2.0

* COmpatibility note fixed
2026-01-22 12:21:46 -05:00
Pratik Basyal
f1c86d7d29 720 Post GA Known Issues update (#5879)
* 7.2.0 Known issues and PLDM table updated (#5877)

* Known issues and PLDM table updated

* JAX workload known issues added

* Minor changes

* Minor update
2026-01-21 17:29:18 -05:00
Alex Xu
370816001e Merge branch 'roc-7.2.x' into docs/7.2.0 2026-01-21 15:29:08 -05:00
Swati Rawat
d5994da509 Merge pull request #5872 from SwRaw/swaraw_cherrypick
Cherrypicking replacement of rocm-smi with amd-smi from ROCm internal
2026-01-21 19:10:51 +05:30
srawat
c02f86c0e7 Update prerequisite-system-validation.rst 2026-01-21 17:43:10 +05:30
srawat
d3523c24d3 replace rocm-smi reference with amd-smi 2026-01-21 17:40:26 +05:30
Swati Rawat
1980239b81 Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:31:41 +05:30
Swati Rawat
c75fd6f532 Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:31:05 +05:30
Swati Rawat
72cb598190 Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:30:33 +05:30
Swati Rawat
9b55b77aaa Update docs/how-to/rocm-for-ai/system-setup/prerequisite-system-validation.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:29:45 +05:30
Swati Rawat
8267303e1d Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:29:04 +05:30
Swati Rawat
86d2c4e891 Update docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v24.12-dev.rst
Co-authored-by: peterjunpark <git@peterjunpark.com>
2026-01-21 17:28:23 +05:30
srawat
2977e35330 Update single-gpu-fine-tuning-and-inference.rst 2026-01-21 17:27:13 +05:30
srawat
e95955f572 Update multi-gpu-fine-tuning-and-inference.rst 2026-01-21 17:27:13 +05:30
7 changed files with 32 additions and 53 deletions

View File

@@ -62,9 +62,14 @@ parameters:
- name: rocmDependencies
type: object
default:
- AMDMIGraphX
- clr
- half
- hipBLAS-common
- hipBLASLt
- llvm-project
- MIOpen
- rocBLAS
- rocDecode
- rocm-cmake
- rocminfo
@@ -77,7 +82,12 @@ parameters:
- aomp
- clr
- half
- hipBLAS-common
- hipBLASLt
- llvm-project
- MIOpen
- rocBLAS
- rocprofiler-register
- ROCR-Runtime
- roctracer
- rpp

View File

@@ -71,7 +71,6 @@ parameters:
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.target }}
timeoutInMinutes: 120
variables:
- group: common
- template: /.azuredevops/variables-global.yml

View File

@@ -47,10 +47,8 @@ parameters:
type: object
default:
- nanobind>=2.0.0
- numpy
- pytest
- pytest-cov
- torch
- name: rocmDependencies
type: object
default:
@@ -103,7 +101,8 @@ jobs:
- template: /.azuredevops/variables-global.yml
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
pool: ${{ variables.MEDIUM_BUILD_POOL }}
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
@@ -240,7 +239,7 @@ jobs:
targetType: inline
workingDirectory: build
script: |
cmake --build . --target origami-tests _pyorigami -- -j$(nproc)
cmake --build . --target origami-tests origami_python -- -j$(nproc)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
# Run tests using CTest (discovers and runs both C++ and Python tests)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml

View File

@@ -1,12 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-7.2.0"
<default revision="refs/tags/rocm-7.1.1"
remote="rocm-org"
sync-c="true"
sync-j="4" />
<!--list of projects for ROCm-->
<project name="ROCK-Kernel-Driver" />
<project name="amdsmi" />
<project name="rocm_bandwidth_test" />
<project name="rocm-examples" />
<!--HIP Projects-->
@@ -24,16 +25,30 @@
<project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipSOLVER" />
<project groups="mathlibs" name="hipTensor" />
<project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" />
<project groups="mathlibs" name="rocAL" />
<project groups="mathlibs" name="rocALUTION" />
<project groups="mathlibs" name="rocDecode" />
<project groups="mathlibs" name="rocJPEG" />
<!-- The following components have been migrated to rocm-libraries:
hipBLAS-common hipBLAS hipBLASLt hipCUB
hipFFT hipRAND hipSPARSE hipSPARSELt
MIOpen rocBLAS rocFFT rocPRIM rocRAND
rocSPARSE rocThrust Tensile -->
<project groups="mathlibs" name="rocm-libraries" />
<!-- The following components have been migrated to rocm-systems:
aqlprofile clr hip hip-tests hipother
rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute
rocprofiler-register rocprofiler-sdk rocprofiler-systems
rocprofiler rocr-runtime roctracer -->
<project groups="mathlibs" name="rocm-systems" />
<project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocSOLVER" />
<project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocWMMA" />
<project groups="mathlibs" name="rocm-cmake" />
<project groups="mathlibs" name="rpp" />
<project groups="mathlibs" name="TransferBench" />
@@ -41,4 +56,4 @@
<project name="aomp" path="openmp-extras/aomp" />
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
<project name="flang" path="openmp-extras/flang" />
</manifest>
</manifest>

View File

@@ -130,7 +130,7 @@ After loading the model in this way, the model is fully ready to use the resourc
torchtune for fine-tuning and inference
=============================================
`torchtune <https://meta-pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU
`torchtune <https://pytorch.org/torchtune/main/>`_ is a PyTorch-native library for easy single and multi-GPU
model fine-tuning and inference with LLMs.
#. Install torchtune using pip.

View File

@@ -37,7 +37,7 @@ click==8.3.1
# sphinx-external-toc
comm==0.2.3
# via ipykernel
cryptography==46.0.5
cryptography==46.0.3
# via pyjwt
debugpy==1.8.19
# via ipykernel

View File

@@ -1,44 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-7.2.0"
remote="rocm-org"
sync-c="true"
sync-j="4" />
<!--list of projects for ROCm-->
<project name="ROCK-Kernel-Driver" />
<project name="rocm_bandwidth_test" />
<project name="rocm-examples" />
<!--HIP Projects-->
<project name="HIPIFY" />
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
<project name="half" />
<project name="llvm-project" />
<project name="spirv-llvm-translator" />
<!-- gdb projects -->
<project name="ROCdbgapi" />
<project name="ROCgdb" />
<project name="rocr_debug_agent" />
<!-- ROCm Libraries -->
<project groups="mathlibs" name="AMDMIGraphX" />
<project groups="mathlibs" name="MIVisionX" />
<project groups="mathlibs" name="ROCmValidationSuite" />
<project groups="mathlibs" name="composable_kernel" />
<project groups="mathlibs" name="hipfort" />
<project groups="mathlibs" name="rccl" />
<project groups="mathlibs" name="rocAL" />
<project groups="mathlibs" name="rocALUTION" />
<project groups="mathlibs" name="rocDecode" />
<project groups="mathlibs" name="rocJPEG" />
<project groups="mathlibs" name="rocm-libraries" />
<project groups="mathlibs" name="rocm-systems" />
<project groups="mathlibs" name="rocPyDecode" />
<project groups="mathlibs" name="rocSHMEM" />
<project groups="mathlibs" name="rocm-cmake" />
<project groups="mathlibs" name="rpp" />
<project groups="mathlibs" name="TransferBench" />
<!-- Projects for OpenMP-Extras -->
<project name="aomp" path="openmp-extras/aomp" />
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
<project name="flang" path="openmp-extras/flang" />
</manifest>