Compare commits

...

26 Commits

Author SHA1 Message Date
David Dixon
921ffc53f5 Add libdivide 2025-09-04 02:04:19 +00:00
Joseph Macaranas
3aab9e1bc5 Modify sparseCheckoutDirectories in checkout.yml (#5251)
Added 'shared' to sparseCheckoutDirectories parameter.
2025-09-03 16:58:17 -04:00
David Dixon
2b0ce5e5c2 Fix typo (#5250) 2025-09-03 13:59:41 -06:00
David Dixon
f1be2d291a Add fmtlib version that works with spdlog (#5249) 2025-09-03 13:26:18 -06:00
amd-hsivasun
07cb61f969 Update testjob dependsOn 2025-09-03 14:02:47 -04:00
amd-hsivasun
c486c39b50 Update rocprofiler-compute.yml
Reverted Component name and updated job names
2025-09-03 14:02:47 -04:00
amd-hsivasun
e68d9e9ce2 Update rocprofiler-compute.yml 2025-09-03 14:02:47 -04:00
amd-hsivasun
bff5c4a955 Fixed sparseCheckoutDir 2025-09-03 14:02:47 -04:00
amd-hsivasun
b0abc43c46 Added sparseCheckout to testjob template 2025-09-03 14:02:47 -04:00
amd-hsivasun
ceabccad83 Fixed componentName 2025-09-03 14:02:47 -04:00
amd-hsivasun
2628812fc4 [Ex CI] Enable rocprofiler-compute monorepo 2025-09-03 14:02:47 -04:00
amd-hsivasun
df3ea80290 Enable Roctracer Monorepo 2025-09-03 14:02:20 -04:00
David Dixon
b6647dfb22 Add spdlog source builds (#5247) 2025-09-03 11:35:53 -06:00
David Dixon
c34fddb26a Add boost deps (#5235) 2025-09-02 13:28:19 -06:00
Daniel Su
977e9c2295 [Ex CI] change hip-clr pipeline ID (#5230) 2025-08-27 13:06:08 -04:00
Daniel Su
eac9772fff [Ex CI] add temporary downstream path from rocBLAS to hipBLAS (#5184) 2025-08-27 13:05:51 -04:00
Daniel Su
151a4bd7bc [Ex CI] add retries to potentially flaky steps (#5175) 2025-08-27 13:05:26 -04:00
Daniel Su
9d28684161 [Ex CI] enable clr/hip/hipother monorepo builds (#5217) 2025-08-27 10:43:07 -04:00
Braden Stefanuk
9ea9b33d14 [superbuild] Configure pipeline (#5221) 2025-08-26 15:12:19 -06:00
Matt Williams
1d42f7cc62 Deep learning frameworks edits for scale (#5189)
* Deep learning frameworks edits for scale

Based on https://ontrack-internal.amd.com/browse/ROCDOC-1809

* update table

table

* leo comments

* formatting

* format

* update table based on feedback

* header

* Update machine learning page

* headers

* Apply suggestions from code review

Co-authored-by: anisha-amd <anisha.sankar@amd.com>

* Update .wordlist.txt

* formatting

* Update docs/how-to/deep-learning-rocm.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

---------

Co-authored-by: Matt Williams <Matt.Williams+amdeng@amd.com>
Co-authored-by: anisha-amd <anisha.sankar@amd.com>
Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>
2025-08-22 11:46:07 -04:00
Peter Park
98029db4ee docs: Add Primus (Megatron) training Docker documentation (#5218) 2025-08-21 23:50:55 -04:00
Matt Williams
65ebbaa117 Merge pull request #5113 from ROCm/aqlprofile
AQLProfile component additions
2025-08-21 12:53:16 -04:00
Joseph Macaranas
3dfc0cdbf1 [External CI] Update CMake on MIOpen build pipeline (#5210) 2025-08-20 15:37:15 +00:00
Matt Williams
9786a75390 Update license 2025-07-31 10:33:36 -04:00
Matt Williams
95543cae2a Final edits 2025-07-30 14:43:52 -04:00
Matt Williams
1cf3eef9da AQLProfile component additions 2025-07-28 14:39:39 -04:00
41 changed files with 2741 additions and 415 deletions

View File

@@ -1,10 +1,29 @@
parameters:
- name: componentName
type: string
default: hip_clr_combined
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -35,93 +54,24 @@ parameters:
type: object
default:
- llvm-project
# hip and clr are tightly-coupled
# run this same template for both repos
# any changes for clr should just trigger HIP pipeline
# similarly for hipother repo, for Nvidia backend
- ROCR-Runtime
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt }
- { os: almalinux8, packageManager: dnf }
- { os: ubuntu2204, packageManager: apt, platform: amd }
- { os: ubuntu2204, packageManager: apt, platform: nvidia }
- { os: almalinux8, packageManager: dnf, platform: amd }
- { os: almalinux8, packageManager: dnf, platform: nvidia }
# HIP with AMD backend
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: hip_clr_combined_${{ job.os }}_amd
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
variables:
- group: common
- template: /.azuredevops/variables-global.yml
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
# checkout triggering repo (either HIP or clr)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
# if this is triggered by HIP repo, matching repo is clr
# if this is triggered by clr repo, matching repo is HIP
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: matching_repo
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: hipother_repo
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependenciesAMD }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
os: ${{ job.os }}
# compile clr
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: clr
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
os: ${{ job.os }}
useAmdclang: false
extraBuildFlags: >-
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-DHIP_PLATFORM=amd
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
-DCLR_BUILD_HIP=ON
-DCLR_BUILD_OCL=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
artifactName: amd
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: amd
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters:
# aptPackages: ${{ parameters.aptPackages }}
# pipModules: ${{ parameters.pipModules }}
# environment: amd
# HIP with Nvidia backend
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: hip_clr_combined_${{ job.os }}_nvidia
- job: ${{ parameters.componentName }}_${{ job.os }}_${{ job.platform }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
@@ -140,49 +90,45 @@ jobs:
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
# checkout triggering repo (either HIP or clr)
# full checkout of rocm-systems superrepo, we need clr, hip, and hipother
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
# if this is triggered by HIP repo, matching repo is clr
# if this is triggered by clr repo, matching repo is HIP
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: matching_repo
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: hipother_repo
# sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
os: ${{ job.os }}
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
displayName: 'Artifact listing'
# compile clr
${{ if eq(job.platform, 'amd') }}:
dependencyList: ${{ parameters.rocmDependenciesAMD }}
${{ elseif eq(job.platform, 'nvidia') }}:
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: clr
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
cmakeBuildDir: $(Agent.BuildDirectory)/s/projects/clr/build
cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/clr
os: ${{ job.os }}
useAmdclang: false
extraBuildFlags: >-
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
-DHIP_PLATFORM=nvidia
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
-DHIP_COMMON_DIR=$(Agent.BuildDirectory)/s/projects/hip
-DHIPNV_DIR=$(Agent.BuildDirectory)/s/projects/hipother/hipnv
-DHIP_PLATFORM=${{ job.platform }}
-DCLR_BUILD_HIP=ON
-DCLR_BUILD_OCL=OFF
-DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
-DCLR_BUILD_OCL=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
artifactName: ${{ job.platform }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: nvidia
artifactName: ${{ job.platform }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
# parameters:
# aptPackages: ${{ parameters.aptPackages }}
# pipModules: ${{ parameters.pipModules }}
# environment: nvidia

View File

@@ -131,6 +131,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -149,6 +150,7 @@ jobs:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: Bash@3
displayName: Build and install other dependencies
retryCountOnTaskFailure: 3
inputs:
targetType: inline
workingDirectory: $(Agent.BuildDirectory)/s
@@ -210,6 +212,7 @@ jobs:
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -228,6 +231,7 @@ jobs:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: Bash@3
displayName: Build and install other dependencies
retryCountOnTaskFailure: 3
inputs:
targetType: inline
workingDirectory: $(Agent.BuildDirectory)/s

View File

@@ -171,6 +171,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: DownloadPipelineArtifact@2
displayName: 'Download Pipeline Wheel Files'
retryCountOnTaskFailure: 3
inputs:
itemPattern: '**/*${{ job.os }}*.whl'
targetPath: $(Agent.BuildDirectory)

View File

@@ -35,6 +35,8 @@ parameters:
- ccache
- gfortran
- git
- libboost-filesystem-dev
- libboost-program-options-dev
- libdrm-dev
- liblapack-dev
- libmsgpack-dev

View File

@@ -115,6 +115,13 @@ parameters:
# buildDependsOn:
# - rocBLAS_build
# - rocPRIM_build
# temporary rocblas->hipblas downstream path while the SOLVERs are disabled
- hipBLAS:
name: hipBLAS
sparseCheckoutDir: projects/hipblas
skipUnifiedBuild: 'false'
buildDependsOn:
- rocBLAS_build
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:

View File

@@ -190,6 +190,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: DownloadPipelineArtifact@2
displayName: 'Download Pipeline Wheel Files'
retryCountOnTaskFailure: 3
inputs:
itemPattern: '**/*.whl'
targetPath: $(Agent.BuildDirectory)

View File

@@ -36,8 +36,10 @@ parameters:
- gfortran
- git
- libdrm-dev
- liblapack-dev
- libmsgpack-dev
- libnuma-dev
- libopenblas-dev
- ninja-build
- python3-pip
- python3-venv
@@ -46,6 +48,8 @@ parameters:
default:
- joblib
- "packaging>=22.0"
- pytest
- pytest-cmake
- --upgrade
- name: rocmDependencies
type: object
@@ -98,12 +102,12 @@ jobs:
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
@@ -134,12 +138,26 @@ jobs:
rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
rocm-libraries | ${{ job.os }} | ${{ job.target }}
rocm-libraries | ${{ job.os }}
- task: Bash@3
displayName: Add paths for CMake and Python site-packages binaries
inputs:
targetType: inline
script: |
USER_BASE=$(python3 -m site --user-base)
echo "##vso[task.prependpath]$USER_BASE/bin"
echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
displayName: Set cmake configure paths
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
extraBuildFlags: >-
-DROCM_LIBRARIES_SUPERBUILD=ON
-GNinja
-D CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor;$(PytestCmakePath)
-D CMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
-D CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-D CMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache
-D CMAKE_C_COMPILER_LAUNCHER=ccache
-G Ninja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}

View File

@@ -1,10 +1,29 @@
parameters:
- name: componentName
type: string
default: rocprofiler-compute
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -78,6 +97,10 @@ parameters:
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: rocprofiler_compute_build_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}_${{ job.target }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
@@ -94,15 +117,19 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
componentName: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -111,78 +138,83 @@ jobs:
# pipModules: ${{ parameters.pipModules }}
# gpuTarget: ${{ job.target }}
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: rocprofiler_compute_test_${{ job.target }}
timeoutInMinutes: 120
dependsOn: rocprofiler_compute_build_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: PYTHON_VERSION
value: 3.10
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
- task: Bash@3
displayName: Add en_US.UTF-8 locale
inputs:
targetType: inline
script: |
sudo locale-gen en_US.UTF-8
sudo update-locale
locale -a
- task: Bash@3
displayName: Add ROCm binaries to PATH
inputs:
targetType: inline
script: |
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release
-DENABLE_TESTS=ON
-DINSTALL_TESTS=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: rocprofiler-compute
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}
- ${{ if eq(parameters.unifiedBuild, False) }}:
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: rocprofiler_compute_test_${{ job.target }}
timeoutInMinutes: 120
dependsOn: rocprofiler_compute_build_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: PYTHON_VERSION
value: 3.10
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- task: Bash@3
displayName: Add en_US.UTF-8 locale
inputs:
targetType: inline
script: |
sudo locale-gen en_US.UTF-8
sudo update-locale
locale -a
- task: Bash@3
displayName: Add ROCm binaries to PATH
inputs:
targetType: inline
script: |
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release
-DENABLE_TESTS=ON
-DINSTALL_TESTS=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: ${{ parameters.componentName }}
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}

View File

@@ -8,6 +8,22 @@ parameters:
- name: checkoutRef
type: string
default: ''
# monorepo related parameters
- name: sparseCheckoutDir
type: string
default: ''
- name: triggerDownstreamJobs
type: boolean
default: false
- name: downstreamAggregateNames
type: string
default: ''
- name: buildDependsOn
type: object
default: null
- name: unifiedBuild
type: boolean
default: false
# set to true if doing full build of ROCm stack
# and dependencies are pulled from same pipeline
- name: aggregatePipeline
@@ -65,6 +81,10 @@ parameters:
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
${{ if parameters.buildDependsOn }}:
dependsOn:
- ${{ each build in parameters.buildDependsOn }}:
- ${{ build }}_${{ job.os }}_${{ job.target }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
@@ -87,6 +107,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
@@ -94,6 +115,8 @@ jobs:
gpuTarget: ${{ job.target }}
aggregatePipeline: ${{ parameters.aggregatePipeline }}
os: ${{ job.os }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
# the linker flags will not affect ubuntu2204 builds as the paths do not exist
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
@@ -109,10 +132,13 @@ jobs:
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
parameters:
componentName: ${{ parameters.componentName }}
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
componentName: ${{ parameters.componentName }}
os: ${{ job.os }}
gpuTarget: ${{ job.target }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
@@ -123,53 +149,57 @@ jobs:
# gpuTarget: ${{ job.target }}
# registerROCmPackages: true
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: roctracer
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
testParameters: ''
testDir: $(Agent.BuildDirectory)
testPublishResults: false
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}
registerROCmPackages: true
- ${{ if eq(parameters.unifiedBuild, False) }}:
- ${{ each job in parameters.jobMatrix.testJobs }}:
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
condition:
and(succeeded(),
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
eq(${{ parameters.aggregatePipeline }}, False)
)
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ job.target }}_test_pool
workspace:
clean: all
steps:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
packageManager: ${{ job.packageManager }}
registerROCmPackages: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
parameters:
preTargetFilter: ${{ parameters.componentName }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
checkoutRef: ${{ parameters.checkoutRef }}
dependencyList: ${{ parameters.rocmTestDependencies }}
gpuTarget: ${{ job.target }}
os: ${{ job.os }}
${{ if parameters.triggerDownstreamJobs }}:
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: ${{ parameters.componentName }}
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
testParameters: ''
testDir: $(Agent.BuildDirectory)
testPublishResults: false
os: ${{ job.os }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
environment: test
gpuTarget: ${{ job.target }}
registerROCmPackages: true

View File

@@ -0,0 +1,67 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: fmtlibVersion
type: string
default: ''
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- libfmt-dev
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt}
- { os: almalinux8, packageManager: dnf}
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: fmtlib_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: Bash@3
displayName: Clone fmtlib ${{ parameters.fmtlibVersion }}
inputs:
targetType: inline
script: git clone https://github.com/fmtlib/fmt.git -b ${{ parameters.fmtlibVersion }}
workingDirectory: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
cmakeBuildDir: $(Agent.BuildDirectory)/fmt/build
cmakeSourceDir: $(Agent.BuildDirectory)/fmt
useAmdclang: false
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-DFMT_SYSTEM_HEADERS=ON
-DFMT_INSTALL=ON
-DFMT_TEST=OFF
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}

View File

@@ -0,0 +1,64 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: libdivideVersion
type: string
default: ''
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt}
- { os: almalinux8, packageManager: dnf}
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: libdivide_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: Bash@3
displayName: Clone libdivide ${{ parameters.libdivideVersion }}
inputs:
targetType: inline
script: git clone https://github.com/ridiculousfish/libdivide.git -b ${{ parameters.libdivideVersion }}
workingDirectory: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
cmakeBuildDir: $(Agent.BuildDirectory)/libdivide/build
cmakeSourceDir: $(Agent.BuildDirectory)/libdivide
useAmdclang: false
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-DLIBDIVIDE_BUILD_TESTS=OFF
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}

View File

@@ -0,0 +1,71 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: spdlogVersion
type: string
default: ''
- name: aptPackages
type: object
default:
- cmake
- git
- ninja-build
- name: jobMatrix
type: object
default:
buildJobs:
- { os: ubuntu2204, packageManager: apt}
- { os: almalinux8, packageManager: dnf}
jobs:
- ${{ each job in parameters.jobMatrix.buildJobs }}:
- job: spdlog_${{ job.os }}
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: 'ubuntu-22.04'
${{ if eq(job.os, 'almalinux8') }}:
container:
image: rocmexternalcicd.azurecr.io/manylinux228:latest
endpoint: ContainerService3
workspace:
clean: all
steps:
- checkout: none
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
packageManager: ${{ job.packageManager }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
parameters:
dependencyList:
- fmtlib
- task: Bash@3
displayName: Clone spdlog ${{ parameters.spdlogVersion }}
inputs:
targetType: inline
script: git clone https://github.com/gabime/spdlog.git -b ${{ parameters.spdlogVersion }}
workingDirectory: $(Agent.BuildDirectory)
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
os: ${{ job.os }}
cmakeBuildDir: $(Agent.BuildDirectory)/spdlog/build
cmakeSourceDir: $(Agent.BuildDirectory)/spdlog
useAmdclang: false
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/vendor
-DCMAKE_BUILD_TYPE=Release
-DSPDLOG_USE_STD_FORMAT=OFF
-DSPDLOG_FMT_EXTERNAL_HO=ON
-DSPDLOG_INSTALL=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
os: ${{ job.os }}

View File

@@ -397,6 +397,7 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- task: DownloadPipelineArtifact@2
displayName: 'Download Pipeline Wheel Files'
retryCountOnTaskFailure: 3
inputs:
itemPattern: '**/*.whl'
targetPath: $(Agent.BuildDirectory)

View File

@@ -93,7 +93,7 @@ schedules:
jobs:
- ${{ each job in parameters.jobList }}:
- job: nightly_${{ job.os }}_${{ job.target }}
timeoutInMinutes: 90
timeoutInMinutes: 120
variables:
- group: common
- template: /.azuredevops/variables-global.yml
@@ -226,6 +226,7 @@ jobs:
cat Dockerfile
- task: Docker@2
displayName: Build and upload Docker image
retryCountOnTaskFailure: 3
inputs:
containerRegistry: ContainerService3
repository: 'nightly-${{ job.os }}-${{ job.target }}'

View File

@@ -0,0 +1,23 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: fmtlibVersion
type: string
default: "11.1.3"
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/fmtlib.yml
parameters:
fmtlibVersion: ${{ parameters.fmtlibVersion }}

View File

@@ -0,0 +1,23 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: libdivideVersion
type: string
default: master
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/libdivide.yml
parameters:
libdivideVersion: ${{ parameters.libdivideVersion }}

View File

@@ -0,0 +1,23 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: spdlogVersion
type: string
default: "v1.15.1"
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/spdlog.yml
parameters:
spdlogVersion: ${{ parameters.spdlogVersion }}

View File

@@ -24,8 +24,12 @@ parameters:
steps:
- task: DownloadPipelineArtifact@2
displayName: Download ${{ parameters.componentName }}
retryCountOnTaskFailure: 3
inputs:
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
${{ if eq(parameters.componentName, 'clr') }}:
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*amd*' # filter out nvidia clr artifacts
${{ else }}:
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
targetPath: '$(Pipeline.Workspace)/d'
allowPartiallySucceededBuilds: true
${{ if parameters.aggregatePipeline }}:

View File

@@ -20,7 +20,7 @@ steps:
retryCountOnTaskFailure: 3
fetchFilter: blob:none
${{ if ne(parameters.sparseCheckoutDir, '') }}:
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }} shared
path: sparse
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
- task: Bash@3

View File

@@ -10,6 +10,7 @@ steps:
- ${{ if eq(parameters.registerROCmPackages, true) }}:
- task: Bash@3
displayName: 'Register AMDGPU & ROCm repos (apt)'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
@@ -20,7 +21,8 @@ steps:
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt update
- task: Bash@3
displayName: 'sudo apt-get update'
displayName: 'APT update and install packages'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
@@ -28,15 +30,6 @@ steps:
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
- task: Bash@3
displayName: 'sudo apt-get fix'
inputs:
targetType: inline
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
- ${{ if gt(length(parameters.aptPackages), 0) }}:
- task: Bash@3
displayName: 'sudo apt-get install ...'
inputs:
targetType: inline
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update && \
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install && \
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}

View File

@@ -5,51 +5,28 @@ parameters:
steps:
- task: Bash@3
displayName: Get aqlprofile package name
inputs:
targetType: inline
${{ if eq(parameters.os, 'ubuntu2204') }}:
script: |
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
${{ if eq(parameters.os, 'almalinux8') }}:
script: |
export packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1)
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
- task: Bash@3
displayName: 'Download aqlprofile'
inputs:
targetType: inline
workingDirectory: '$(Pipeline.Workspace)'
${{ if eq(parameters.os, 'ubuntu2204') }}:
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
${{ if eq(parameters.os, 'almalinux8') }}:
script: wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$(packageName)
- task: Bash@3
displayName: 'Extract aqlprofile'
inputs:
targetType: inline
workingDirectory: '$(Pipeline.Workspace)'
${{ if eq(parameters.os, 'ubuntu2204') }}:
script: |
mkdir hsa-amd-aqlprofile
dpkg-deb -R $(packageName) hsa-amd-aqlprofile
${{ if eq(parameters.os, 'almalinux8') }}:
script: |
mkdir hsa-amd-aqlprofile
sudo dnf -y install rpm-build cpio
rpm2cpio $(packageName) | (cd hsa-amd-aqlprofile && cpio -idmv)
- task: Bash@3
displayName: 'Copy aqlprofile files'
displayName: Download and install aqlprofile
retryCountOnTaskFailure: 3
inputs:
targetType: inline
workingDirectory: $(Agent.BuildDirectory)
script: |
mkdir -p $(Agent.BuildDirectory)/rocm
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm
workingDirectory: '$(Pipeline.Workspace)'
- task: Bash@3
displayName: 'Clean up aqlprofile'
inputs:
targetType: inline
script: rm -rf hsa-amd-aqlprofile $(packageName)
workingDirectory: '$(Pipeline.Workspace)'
set -e
if [ "${{ parameters.os }}" = "ubuntu2204" ]; then
packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") && \
wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$packageName && \
mkdir -p hsa-amd-aqlprofile && \
dpkg-deb -R $packageName hsa-amd-aqlprofile
elif [ "${{ parameters.os }}" = "almalinux8" ]; then
sudo dnf -y install rpm-build cpio && \
packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1) && \
wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$packageName && \
mkdir -p hsa-amd-aqlprofile && \
rpm2cpio $packageName | (cd hsa-amd-aqlprofile && cpio -idmv)
else
echo "Unsupported OS: ${{ parameters.os }}"
exit 1
fi && \
mkdir -p $(Agent.BuildDirectory)/rocm && \
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm && \
rm -rf hsa-amd-aqlprofile $packageName

View File

@@ -89,6 +89,7 @@ steps:
- ${{ if eq(parameters.registerROCmPackages, true) }}:
- task: Bash@3
displayName: 'Register AMDGPU & ROCm repos (dnf)'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
@@ -109,12 +110,13 @@ steps:
sudo dnf makecache
- task: Bash@3
displayName: 'Install base dnf packages'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
sudo dnf config-manager --set-enabled powertools
# rpm fusion free repo for some dependencies
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
sudo dnf config-manager --set-enabled powertools && \
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm && \
sudo dnf -y install ${{ join(' ', parameters.basePackages) }}
- task: Bash@3
displayName: 'Check gcc environment'
@@ -128,6 +130,7 @@ steps:
g++ -print-file-name=libstdc++.so
- task: Bash@3
displayName: 'Set python 3.11 as default'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
@@ -142,18 +145,20 @@ steps:
- ${{ if eq(pkg, 'ninja-build') }}:
- task: Bash@3
displayName: 'Install ninja 1.11.1'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip
sudo dnf -y install unzip
unzip ninja-linux.zip
sudo mv ninja /usr/local/bin/ninja
sudo chmod +x /usr/local/bin/ninja
sudo dnf -y install unzip && \
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip && \
unzip ninja-linux.zip && \
sudo mv ninja /usr/local/bin/ninja && \
sudo chmod +x /usr/local/bin/ninja && \
echo "##vso[task.prependpath]/usr/local/bin"
- ${{ if ne(parameters.aptToDnfMap[pkg], '') }}:
- task: Bash@3
displayName: 'dnf install ${{ parameters.aptToDnfMap[pkg] }}'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: |

View File

@@ -27,6 +27,7 @@ steps:
- ${{ if gt(length(parameters.pipModules), 0) }}:
- task: Bash@3
displayName: 'pip install ...'
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: python3 -m pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}

View File

@@ -47,8 +47,8 @@ parameters:
developBranch: aomp-dev
hasGpuTarget: false
clr:
pipelineId: 145
developBranch: amd-staging
pipelineId: 335
developBranch: develop
hasGpuTarget: false
composable_kernel:
pipelineId: 86
@@ -59,8 +59,8 @@ parameters:
developBranch: rocm
hasGpuTarget: false
HIP:
pipelineId: 93
developBranch: amd-staging
pipelineId: 335
developBranch: develop
hasGpuTarget: false
hip-tests:
pipelineId: 233

View File

@@ -8,15 +8,18 @@ parameters:
type: object
default:
boost: 250
fmtlib: 341
grpc: 72
gtest: 73
half560: 68
lapack: 69
spdlog: 340
steps:
- ${{ each dependency in parameters.dependencyList }}:
- task: DownloadPipelineArtifact@2
displayName: Download ${{ dependency }}
retryCountOnTaskFailure: 3
inputs:
project: ROCm-CI
buildType: specific

View File

@@ -33,6 +33,7 @@ parameters:
steps:
- task: DownloadPipelineArtifact@2
displayName: Download ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
retryCountOnTaskFailure: 3
inputs:
${{ if eq(parameters.buildType, 'specific') }}:
buildType: specific

View File

@@ -7,6 +7,7 @@ steps:
- task: Bash@3
name: downloadCKBuild
displayName: Download specific CK build
retryCountOnTaskFailure: 3
env:
CXX: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
CC: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang

View File

@@ -116,6 +116,7 @@ Deprecations
DevCap
DirectX
Dockerfile
Dockerized
Doxygen
dropless
ELMo
@@ -123,6 +124,7 @@ ENDPGM
EPYC
ESXi
EoS
fas
FBGEMM
FFT
FFTs
@@ -195,6 +197,7 @@ HWE
HWS
Haswell
Higgs
href
Hyperparameters
Huggingface
ICD
@@ -361,6 +364,7 @@ PowerEdge
PowerShell
Pretrained
Pretraining
Primus
Profiler's
PyPi
Pytest
@@ -525,6 +529,7 @@ Xilinx
Xnack
Xteam
YAML
YAMLs
YML
YModel
ZeRO
@@ -585,6 +590,7 @@ completers
composable
concretization
config
configs
conformant
constructible
convolutional
@@ -795,7 +801,9 @@ preprocessing
preprocessor
prequantized
prerequisites
pretrain
pretraining
primus
profiler
profilers
protobuf

View File

@@ -29,6 +29,7 @@ additional licenses. Please review individual repositories for more information.
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
| [AQLprofile] | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE) |
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
@@ -46,7 +47,6 @@ additional licenses. Please review individual repositories for more information.
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html) |
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
@@ -132,12 +132,10 @@ companies.
### Package licensing
:::{attention}
AQL Profiler and AOCC CPU optimization are both provided in binary form, each
subject to the license agreement enclosed in the directory for the binary available
in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
ROCprof Trace Decoder and AOCC CPU optimizations are provided in binary form, subject to the license agreement enclosed on [GitHub](https://github.com/ROCm/rocprof-trace-decoder/blob/amd-mainline/LICENSE) for ROCprof Trace Decoder, and [Developer Central](https://www.amd.com/en/developer/aocc.html) for AOCC. By using, installing,
copying or distributing ROCprof Trace Decoder or AOCC CPU Optimizations, you agree to
the terms and conditions of this license agreement. If you do not agree to the
terms of this agreement, do not install, copy or use the AQL Profiler and/or the
terms of this agreement, do not install, copy or use ROCprof Trace Decoder or the
AOCC CPU Optimizations.
:::

View File

@@ -1,26 +1,15 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.6_py312
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py312/images/sha256-482ff906532285bceabdf2bda629bd32cb6174d2d07f4243a736378001b28df0
- pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components:
ROCm: 6.4.1
PyTorch: 2.8.0a0+git7d205b2
Python: 3.12
Transformer Engine: 2.1.0.dev0+8c4a512
hipBLASLt: 393e413
Triton: 3.3.0
RCCL: 2.23.4.7a84c5d
doc_name: Ubuntu 24.04 + Python 3.12
- pull_tag: rocm/megatron-lm:v25.6_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py310/images/sha256-9627bd9378684fe26cb1a10c7dd817868f553b33402e49b058355b0f095568d6
components:
ROCm: 6.4.1
PyTorch: 2.8.0a0+git7d205b2
ROCm: 6.4.2
Primus: v0.1.0-rc1
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.1.0.dev0+8c4a512
hipBLASLt: 393e413
Transformer Engine: 2.1.0.dev0+ba586519
hipBLASLt: 37ba1d36
Triton: 3.3.0
RCCL: 2.23.4.7a84c5d
doc_name: Ubuntu 22.04 + Python 3.10
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama

View File

@@ -0,0 +1,60 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.6_py312
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py312/images/sha256-482ff906532285bceabdf2bda629bd32cb6174d2d07f4243a736378001b28df0
components:
ROCm: 6.4.1
PyTorch: 2.8.0a0+git7d205b2
Python: 3.12
Transformer Engine: 2.1.0.dev0+8c4a512
hipBLASLt: 393e413
Triton: 3.3.0
RCCL: 2.23.4.7a84c5d
doc_name: Ubuntu 24.04 + Python 3.12
- pull_tag: rocm/megatron-lm:v25.6_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py310/images/sha256-9627bd9378684fe26cb1a10c7dd817868f553b33402e49b058355b0f095568d6
components:
ROCm: 6.4.1
PyTorch: 2.8.0a0+git7d205b2
Python: "3.10"
Transformer Engine: 2.1.0.dev0+8c4a512
hipBLASLt: 393e413
Triton: 3.3.0
RCCL: 2.23.4.7a84c5d
doc_name: Ubuntu 22.04 + Python 3.10
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 3.1 70B (proxy)
mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B (proxy)
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
- model: Qwen 2.5 72B
mad_tag: pyt_megatron_lm_train_qwen2.5-72b

View File

@@ -0,0 +1,58 @@
dockers:
- pull_tag: rocm/megatron-lm:v25.7_py310
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a
components:
ROCm: 6.4.2
Primus: v0.1.0-rc1
PyTorch: 2.8.0a0+gitd06a406
Python: "3.10"
Transformer Engine: 2.1.0.dev0+ba586519
hipBLASLt: 37ba1d36
Triton: 3.3.0
RCCL: 2.22.3
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.3-70b
config_name: llama3.3_70B-pretrain.yaml
- model: Llama 3.1 70B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-70b
config_name: llama3.1_70B-pretrain.yaml
- model: Llama 3.1 8B
mad_tag: primus_pyt_megatron_lm_train_llama-3.1-8b
config_name: llama3.1_8B-pretrain.yaml
- model: Llama 2 7B
mad_tag: primus_pyt_megatron_lm_train_llama-2-7b
config_name: llama2_7B-pretrain.yaml
- model: Llama 2 70B
mad_tag: primus_pyt_megatron_lm_train_llama-2-70b
config_name: llama2_70B-pretrain.yaml
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3 (proxy)
mad_tag: primus_pyt_megatron_lm_train_deepseek-v3-proxy
config_name: deepseek_v3-pretrain.yaml
- model: DeepSeek-V2-Lite
mad_tag: primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
config_name: deepseek_v2_lite-pretrain.yaml
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x7b
config_name: mixtral_8x7B_v0.1-pretrain.yaml
- model: Mixtral 8x22B (proxy)
mad_tag: primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
config_name: mixtral_8x22B_v0.1-pretrain.yaml
- group: Qwen
tag: qwen
models:
- model: Qwen 2.5 7B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-7b
config_name: primus_qwen2.5_7B-pretrain.yaml
- model: Qwen 2.5 72B
mad_tag: primus_pyt_megatron_lm_train_qwen2.5-72b
config_name: qwen2.5_72B-pretrain.yaml

View File

@@ -2,58 +2,132 @@
:description: How to install deep learning frameworks for ROCm
:keywords: deep learning, frameworks, ROCm, install, PyTorch, TensorFlow, JAX, MAGMA, DeepSpeed, ML, AI
********************************************
Installing deep learning frameworks for ROCm
********************************************
**********************************
Deep learning frameworks for ROCm
**********************************
ROCm provides a comprehensive ecosystem for deep learning development, including
:ref:`libraries <artificial-intelligence-apis>` for optimized deep learning operations and ROCm-aware versions of popular
deep learning frameworks and libraries such as PyTorch, TensorFlow, and JAX. ROCm works closely with these
frameworks to ensure that framework-specific optimizations take advantage of AMD accelerator and GPU architectures.
Deep learning frameworks provide environments for machine learning, training, fine-tuning, inference, and performance optimization.
The following guides provide information on compatibility and supported
features for these ROCm-enabled deep learning frameworks.
ROCm offers a complete ecosystem for developing and running deep learning applications efficiently. It also provides ROCm-compatible versions of popular frameworks and libraries, such as PyTorch, TensorFlow, JAX, and others.
* :doc:`PyTorch compatibility <../compatibility/ml-compatibility/pytorch-compatibility>`
* :doc:`TensorFlow compatibility <../compatibility/ml-compatibility/tensorflow-compatibility>`
* :doc:`JAX compatibility <../compatibility/ml-compatibility/jax-compatibility>`
* :doc:`verl compatibility <../compatibility/ml-compatibility/verl-compatibility>`
* :doc:`Stanford Megatron-LM compatibility <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`
* :doc:`DGL compatibility <../compatibility/ml-compatibility/dgl-compatibility>`
* :doc:`Megablocks compatibility <../compatibility/ml-compatibility/megablocks-compatibility>`
* :doc:`Taichi compatibility <../compatibility/ml-compatibility/taichi-compatibility>`
The AMD ROCm organization actively contributes to open-source development and collaborates closely with framework organizations. This collaboration ensures that framework-specific optimizations effectively leverage AMD GPUs and accelerators.
This chart steps through typical installation workflows for installing deep learning frameworks for ROCm.
The table below summarizes information about ROCm-enabled deep learning frameworks. It includes details on ROCm compatibility and third-party tool support, installation steps and options, and links to GitHub resources. For a complete list of supported framework versions on ROCm, see the :doc:`Compatibility matrix <../compatibility/compatibility-matrix>` topic.
.. image:: ../data/how-to/framework_install_2024_07_04.png
:alt: Flowchart for installing ROCm-aware machine learning frameworks
:align: center
.. list-table::
:header-rows: 1
:widths: 5 3 6 3
See the installation instructions to get started.
* - Framework
- Installation
- Installation options
- GitHub
* :doc:`PyTorch for ROCm <rocm-install-on-linux:install/3rd-party/pytorch-install>`
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
* :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
* :doc:`verl for ROCm <rocm-install-on-linux:install/3rd-party/verl-install>`
* :doc:`Stanford Megatron-LM for ROCm <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>`
* :doc:`DGL for ROCm <rocm-install-on-linux:install/3rd-party/dgl-install>`
* :doc:`Megablocks for ROCm <rocm-install-on-linux:install/3rd-party/megablocks-install>`
* :doc:`Taichi for ROCm <rocm-install-on-linux:install/3rd-party/taichi-install>`
* - `PyTorch <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/pytorch-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html#using-a-docker-image-with-pytorch-pre-installed>`_
- `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html#using-a-wheels-package>`_
- `ROCm Base Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html#using-the-pytorch-rocm-base-docker-image>`_
- `Upstream Docker file <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html#using-the-pytorch-upstream-dockerfile>`_
- .. raw:: html
<a href="https://github.com/ROCm/pytorch"><i class="fab fa-github fa-lg"></i></a>
* - `TensorFlow <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/tensorflow-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/tensorflow-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/tensorflow-install.html#using-a-docker-image-with-tensorflow-pre-installed>`_
- `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/tensorflow-install.html#using-a-wheels-package>`_
.. note::
- .. raw:: html
<a href="https://github.com/ROCm/tensorflow-upstream"><i class="fab fa-github fa-lg"></i></a>
* - `JAX <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/jax-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/jax-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/jax-install.html#using-a-prebuilt-docker-image>`_
- .. raw:: html
<a href="https://github.com/ROCm/jax"><i class="fab fa-github fa-lg"></i></a>
* - `verl <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/verl-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/verl-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/verl-install.html#use-a-prebuilt-docker-image-with-verl-pre-installed>`_
- .. raw:: html
<a href="https://github.com/ROCm/verl"><i class="fab fa-github fa-lg"></i></a>
* - `Stanford Megatron-LM <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/stanford-megatron-lm-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/stanford-megatron-lm-install.html#use-a-prebuilt-docker-image-with-stanford-megatron-lm-pre-installed>`_
- .. raw:: html
<a href="https://github.com/ROCm/Stanford-Megatron-LM"><i class="fab fa-github fa-lg"></i></a>
* - `DGL <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/dgl-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`_
- .. raw:: html
<a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a>
* - `Megablocks <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/megablocks-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/megablocks-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/megablocks-install.html#using-a-prebuilt-docker-image-with-megablocks-pre-installed>`_
- .. raw:: html
<a href="https://github.com/ROCm/megablocks"><i class="fab fa-github fa-lg"></i></a>
* - `Taichi <https://rocm.docs.amd.com/en/latest/compatibility/ml-compatibility/taichi-compatibility.html>`_
- .. raw:: html
<a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/taichi-install.html"><i class="fas fa-link fa-lg"></i></a>
-
- `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/taichi-install.html#use-a-prebuilt-docker-image-with-taichi-pre-installed>`_
- `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/taichi-install.html#use-a-wheels-package>`_
- .. raw:: html
<a href="https://github.com/ROCm/taichi"><i class="fab fa-github fa-lg"></i></a>
For guidance on installing ROCm itself, refer to :doc:`ROCm installation for Linux <rocm-install-on-linux:index>`.
Learn how to use your ROCm deep learning environment for training, fine-tuning, inference, and performance optimization
through the following guides.
* :doc:`rocm-for-ai/index`
* :doc:`Training <rocm-for-ai/training/index>`
* :doc:`Use ROCm for training <rocm-for-ai/training/index>`
* :doc:`Use ROCm for fine-tuning LLMs <rocm-for-ai/fine-tuning/index>`
* :doc:`Use ROCm for AI inference <rocm-for-ai/inference/index>`
* :doc:`Use ROCm for AI inference optimization <rocm-for-ai/inference-optimization/index>`
* :doc:`Fine-tuning LLMs <rocm-for-ai/fine-tuning/index>`
* :doc:`Inference <rocm-for-ai/inference/index>`
* :doc:`Inference optimization <rocm-for-ai/inference-optimization/index>`

View File

@@ -1,14 +1,14 @@
.. meta::
:description: How to install ROCm and popular machine learning frameworks.
:description: How to install ROCm and popular deep learning frameworks.
:keywords: ROCm, AI, LLM, train, fine-tune, FSDP, DeepSpeed, LLaMA, tutorial
.. _rocm-for-ai-install:
***********************************************
Installing ROCm and machine learning frameworks
***********************************************
********************************************
Installing ROCm and deep learning frameworks
********************************************
Before getting started, install ROCm and supported machine learning frameworks.
Before getting started, install ROCm and supported deep learning frameworks.
.. grid:: 1
@@ -43,29 +43,16 @@ distribution's package manager. See the following documentation resources to get
If you encounter any issues during installation, refer to the
:doc:`Installation troubleshooting <rocm-install-on-linux:reference/install-faq>` guide.
Machine learning frameworks
===========================
Deep learning frameworks
========================
ROCm supports popular machine learning frameworks and libraries including `PyTorch
ROCm supports deep learning frameworks and libraries including `PyTorch
<https://pytorch.org/blog/pytorch-for-amd-rocm-platform-now-available-as-python-package>`_, `TensorFlow
<https://tensorflow.org>`_, `JAX <https://jax.readthedocs.io/en/latest>`_, and `DeepSpeed
<https://cloudblogs.microsoft.com/opensource/2022/03/21/supporting-efficient-large-model-training-on-amd-instinct-gpus-with-deepspeed/>`_.
<https://tensorflow.org>`_, `JAX <https://jax.readthedocs.io/en/latest>`_, and more.
Review the framework installation documentation. For ease-of-use, it's recommended to use official ROCm prebuilt Docker
Review the :doc:`framework installation documentation <../deep-learning-rocm>`. For ease-of-use, it's recommended to use official ROCm prebuilt Docker
images with the framework pre-installed.
* :doc:`PyTorch for ROCm <rocm-install-on-linux:install/3rd-party/pytorch-install>`
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
* :doc:`JAX for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
* :doc:`verl for ROCm <rocm-install-on-linux:install/3rd-party/verl-install>`
* :doc:`Stanford Megatron-LM for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
* :doc:`DGL for ROCm <rocm-install-on-linux:install/3rd-party/jax-install>`
Next steps
==========

View File

@@ -1,3 +1,5 @@
:orphan:
.. meta::
:description: How to train a model using Megatron-LM for ROCm.
:keywords: ROCm, AI, LLM, train, Megatron-LM, megatron, Llama, tutorial, docker, torch
@@ -6,6 +8,14 @@
Training a model with Megatron-LM for ROCm
******************************************
.. caution::
The ROCm Megatron-LM framework now has limited support with this Docker
environment; it now focuses on Primus with Megatron-Core. See :doc:`primus-megatron`.
To learn how to migrate your existing workloads to Primus with Megatron-Core,
see :doc:`previous-versions/megatron-lm-primus-migration-guide`.
The `Megatron-LM framework for ROCm <https://github.com/ROCm/Megatron-LM>`_ is
a specialized fork of the robust Megatron-LM, designed to enable efficient
training of large-scale language models on AMD GPUs. By leveraging AMD
@@ -20,13 +30,17 @@ essential components, including PyTorch, ROCm libraries, and Megatron-LM
utilities. It contains the following software components to accelerate training
workloads:
.. note::
This Docker environment is based on Python 3.10 and Ubuntu 22.04. For an alternative environment with
Python 3.12 and Ubuntu 24.04, see the :doc:`previous ROCm Megatron-LM v25.6 Docker release <previous-versions/megatron-lm-v25.6>`.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
{% set dockers = data.dockers %}
{% if dockers|length > 1 %}
.. tab-set::
{% for docker in data.dockers %}
{% for docker in dockers %}
.. tab-item:: ``{{ docker.pull_tag }}``
:sync: {{ docker.pull_tag }}
@@ -42,28 +56,14 @@ workloads:
{% endfor %}
{% endfor %}
{% elif dockers|length == 1 %}
.. list-table::
:header-rows: 1
* - Software component
- Version
{% for component_name, component_version in docker.components %}
* - {{ component_name }}
- {{ component_version }}
{% endfor %}
{% endif %}
.. _amd-megatron-lm-model-support:
The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
Supported models
================
The following models are supported for training performance benchmarking with Megatron-LM and ROCm.
The following models are supported for training performance benchmarking with Megatron-LM and ROCm
on AMD Instinct MI300X series accelerators.
Some instructions, commands, and training recommendations in this documentation might
vary by model -- select one to get started.
@@ -177,7 +177,7 @@ Download the Docker image
{% if dockers|length > 1 %}
.. tab-set::
{% for docker in data.dockers %}
{% for docker in dockers %}
.. tab-item:: {{ docker.doc_name }}
:sync: {{ docker.pull_tag }}
@@ -227,10 +227,17 @@ Download the Docker image
docker start megatron_training_env
docker exec -it megatron_training_env bash
The Docker container includes a pre-installed, verified version of the ROCm
Megatron-LM development branch
`<https://github.com/ROCm/Megatron-LM/tree/rocm_dev>`__, including necessary
training scripts.
4. **Megatron-LM backward compatibility setup** -- this Docker is primarily intended for use with Primus, but it maintains Megatron-LM compatibility with limited support.
To roll back to using Megatron-LM, follow these steps:
.. code-block:: shell
cd /workspace/Megatron-LM/
pip uninstall megatron-core
pip install -e .
The Docker container hosts
`<https://github.com/ROCm/Megatron-LM/tree/rocm_dev>`__ at verified commit ``e8e9edc``.
.. _amd-megatron-lm-environment-setup:

View File

@@ -16,12 +16,20 @@ previous releases of the ``ROCm/megatron-lm`` Docker image on `Docker Hub <https
- Components
- Resources
* - v25.6 (latest)
* - v25.7 (latest)
-
* ROCm
* PyTorch
-
* :doc:`Documentation <../megatron-lm>`
* `Docker Hub (py310) <https://hub.docker.com/layers/rocm/megatron-lm/v25.7_py310/images/sha256-6189df849feeeee3ae31bb1e97aef5006d69d2b90c134e97708c19632e20ab5a>`__
* - v25.6
-
* ROCm 6.4.1
* PyTorch 2.8.0a0+git7d205b2
-
* :doc:`Documentation <../megatron-lm>`
* :doc:`Documentation <megatron-lm-v25.6>`
* `Docker Hub (py312) <https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py312/images/sha256-482ff906532285bceabdf2bda629bd32cb6174d2d07f4243a736378001b28df0>`__
* `Docker Hub (py310) <https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py310/images/sha256-9627bd9378684fe26cb1a10c7dd817868f553b33402e49b058355b0f095568d6>`__

View File

@@ -0,0 +1,175 @@
:orphan:
**********************************************************************
Migrating workloads to Primus (Megatron-Core backend) from Megatron-LM
**********************************************************************
Primus supports Megatron-Core as backend optimization library,
replacing ROCm Megatron-LM. This document outlines the steps to migrate
workload from ROCm Megatron-LM to Primus with the Megatron-Core backend.
Model architecture
==================
ROCm Megatron-LM defines model architecture parameters in the training scripts;
for example, the Llama 3 8B model parameters are defined in
`examples/llama/train_llama3.sh <https://github.com/ROCm/Megatron-LM/blob/rocm_dev/examples/llama/train_llama3.sh#L117>`__
as shown below:
.. code-block:: bash
HIDDEN_SIZE=4096
FFN_HIDDEN_SIZE=14336
NUM_LAYERS=32
NUM_HEADS=32
NUM_KV_HEADS=8
Primus defines the model architecture through model YAML configuration files
inside the ``primus/configs/models/megatron/`` repository. For example, Llama 3 8B
model architecture parameters are defined in
`primus/configs/models/megatron/llama3_8B.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/llama3_8B.yaml>`__
as shown below:
.. code-block:: yaml
bases:
- llama3_base.yaml
tokenizer_type: Llama3Tokenizer
tokenizer_model: meta-llama/Llama-3.1-8B
ffn_hidden_size: 14336
hidden_size: 4096
num_attention_heads: 32
num_layers: 32
num_query_groups: 8
Primus' model config files follow a hierarchical design, meaning that new model
config YAMLs can inherit existing model config files by importing them as
bases. For example,
`llama3.1_8B.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/llama3.1_8B.yaml>`__
uses ``llama3_8B.yaml`` as a base config and overrides few parameters, as shown below.
In this example, ``llama3.1_8B`` overrides the ``max_position_embeddings`` value:
.. code-block:: yaml
bases:
- llama3_8B.yaml
tokenizer_type: Llama3Tokenizer
tokenizer_model: meta-llama/Llama-3.1-8B
max_position_embeddings: 131072
.. tip::
Primus provides ``llama_base.yaml`` as the base configuration, which can be
used as bases for additional model architectures. For example,
`mixtral_base.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/mixtral_base.yaml>`__
and
`deepseek_v3_base.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/deepseek_v3_base.yaml>`__
define ``llama_base.yaml`` as its base.
.. code-block:: yaml
# Example mixtral_base.yaml:
bases:
- llama_base.yaml
init_method_std: 0.01
rotary_base: 1000000
qk_layernorm: false
group_query_attention: true
num_query_groups: 8
# moe parameters
num_experts: 8
moe_router_topk: 2
moe_router_load_balancing_type: aux_loss
moe_aux_loss_coeff: 1e-2
moe_grouped_gemm: true
moe_token_dispatcher_type: alltoall
It is recommended to add a new ``${MODEL_NAME}_base.yaml`` to add a new
category of model and define new models on top of it. For example, to add
Qwen2.5 models in Primus, we define
`qwen2.5_base.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/qwen2.5_base.yaml>`__
and build
`qwen2.5_7B.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/qwen2.5_7B.yaml>`__
and
`qwen2.5_72B.yaml <https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/primus/configs/models/megatron/qwen2.5_72B.yaml>`__
using ``qwen2.5_base.yaml`` as the base config.
Training parameters
===================
ROCm Megatron-LM also defines the training parameters, like batch size,
tensor-parallelism, precision, as so on, in the training scripts. For example,
Llama3 8B model parameters are defined in
`examples/llama/train_llama3.sh <https://github.com/ROCm/Megatron-LM/blob/rocm_dev/examples/llama/train_llama3.sh>`__
as shown below:
.. code-block:: bash
TP="${TP:-8}"
PP="${PP:-1}"
CP="${CP:-1}"
MBS="${MBS:-1}"
BS="${BS:-8}"
Primus defines the training parameters in top-level YAML files -- see
`examples/megatron/configs/
<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/megatron/configs>`__.
For example, the `llama3.1_8B-pretrain.yaml
<https://github.com/AMD-AIG-AIMA/Primus/blob/v0.1.0-rc1/examples/megatron/configs/llama3.1_8B-pretrain.yaml>`__
configuration imports the ``llama3.1_8B.yaml`` model architecture file. Users can then override
the default training parameters in ``llama3.1_8B-pretrain.yaml``.
.. code-block:: yaml
# model to run
model: llama3.1_8B.yaml # Model architecture yaml
overrides:
# log
# disable_wandb: false
# disable_tensorboard: false
stderr_sink_level: DEBUG
log_avg_skip_iterations: 2
log_avg_reset_interval: 50
train_iters: 50
micro_batch_size: 2
global_batch_size: 128
seq_length: 8192
max_position_embeddings: 8192
lr: 1.0e-5
min_lr: 0.0
lr_warmup_iters: 2
lr_decay_iters: null
lr_decay_style: cosine
weight_decay: 0.1
adam_beta1: 0.9
adam_beta2: 0.95
eod_mask_loss: true
init_method_std: 0.008
norm_epsilon: 1.0e-6
Backward compatibility with Megatron-LM
=======================================
The Dockerized environment used for Primus maintains compatibility with Megatron-LM with
limited support. To roll back to using Megatron-LM, follow these steps.
.. code-block:: shell
cd /workspace/Megatron-LM/
pip uninstall megatron-core
pip install -e .
Once Megatron-LM is installed, follow :doc:`the documentation <../megatron-lm>` to run workloads as
usual.

View File

@@ -0,0 +1,602 @@
.. meta::
:description: How to train a model using Megatron-LM for ROCm.
:keywords: ROCm, AI, LLM, train, Megatron-LM, megatron, Llama, tutorial, docker, torch
**********************************************
Training a model with Primus and Megatron-Core
**********************************************
`Primus <https://github.com/AMD-AIG-AIMA/Primus>`__ is a unified and flexible
LLM training framework designed to streamline training. It streamlines LLM
training on AMD Instinct accelerators using a modular, reproducible configuration paradigm.
Primus is backend-agnostic and supports multiple training engines -- including Megatron-Core.
.. note::
Primus with the Megatron-Core backend is intended to replace ROCm
Megatron-LM in this Dockerized training environment. To learn how to migrate
workloads from Megatron-LM to Primus with Megatron-Core, see
:doc:`previous-versions/megatron-lm-primus-migration-guide`.
For ease of use, AMD provides a ready-to-use Docker image for MI300 series accelerators
containing essential components for Primus and Megatron-Core.
.. note::
This Docker environment is based on Python 3.10 and Ubuntu 22.04. For an alternative environment with
Python 3.12 and Ubuntu 24.04, see the :doc:`previous ROCm Megatron-LM v25.6 Docker release <previous-versions/megatron-lm-v25.6>`.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
{% set dockers = data.dockers %}
{% set docker = dockers[0] %}
.. list-table::
:header-rows: 1
* - Software component
- Version
{% for component_name, component_version in docker.components.items() %}
* - {{ component_name }}
- {{ component_version }}
{% endfor %}
.. _amd-primus-megatron-lm-model-support:
Supported models
================
The following models are pre-optimized for performance on AMD Instinct MI300X series accelerators.
Some instructions, commands, and training examples in this documentation might
vary by model -- select one to get started.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
{% set model_groups = data.model_groups %}
.. raw:: html
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
<div class="row">
<div class="col-2 me-2 model-param-head">Model</div>
<div class="row col-10">
{% for model_group in model_groups %}
<div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
{% endfor %}
</div>
</div>
<div class="row mt-1">
<div class="col-2 me-2 model-param-head">Model variant</div>
<div class="row col-10">
{% for model_group in model_groups %}
{% set models = model_group.models %}
{% for model in models %}
{% if models|length % 3 == 0 %}
<div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
{% else %}
<div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
{% endif %}
{% endfor %}
{% endfor %}
</div>
</div>
</div>
.. note::
Some models, such as Llama, require an external license agreement through
a third party (for example, Meta).
System validation
=================
Before running AI workloads, it's important to validate that your AMD hardware is configured
correctly and performing optimally.
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
before starting training.
To test for optimal performance, consult the recommended :ref:`System health benchmarks
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
system's configuration.
.. _mi300x-amd-primus-megatron-lm-training:
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
{% set dockers = data.dockers %}
{% set docker = dockers[0] %}
Environment setup
=================
Use the following instructions to set up the environment, configure the script to train models, and
reproduce the benchmark results on MI300X series accelerators with the ``{{ docker.pull_tag }}`` image.
.. _amd-primus-megatron-lm-requirements:
Download the Docker image
-------------------------
1. Use the following command to pull the Docker image from Docker Hub.
.. code-block:: shell
docker pull {{ docker.pull_tag }}
2. Launch the Docker container.
.. code-block:: shell
docker run -it \
--device /dev/dri \
--device /dev/kfd \
--device /dev/infiniband \
--network host --ipc host \
--group-add video \
--cap-add SYS_PTRACE \
--security-opt seccomp=unconfined \
--privileged \
-v $HOME:$HOME \
--shm-size 128G \
--name primus_training_env \
{{ docker.pull_tag }}
3. Use these commands if you exit the ``primus_training_env`` container and need to return to it.
.. code-block:: shell
docker start primus_training_env
docker exec -it primus_training_env bash
The Docker container hosts verified release tag ``v0.1.0-rc1`` of the `Primus
<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1>`__ repository.
.. _amd-primus-megatron-lm-environment-setup:
Configuration
=============
Primus defines a training configuration in YAML for each model in
`examples/megatron/configs <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/megatron/configs>`__.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
{% set model_groups = data.model_groups %}
{% for model_group in model_groups %}
{% for model in model_group.models %}
.. container:: model-doc {{ model.mad_tag }}
To update training parameters for {{ model.model }}, you can update ``examples/megatron/configs/{{ model.config_name }}``.
Note that training configuration YAML files for other models follow this naming convention.
{% endfor %}
{% endfor %}
.. note::
See :ref:`Key options <amd-primus-megatron-lm-benchmark-test-vars>` for more information on configuration options.
Dataset options
---------------
You can use either mock data or real data for training.
* Mock data can be useful for testing and validation. Use the ``mock_data`` field to toggle between mock and real data. The default
value is ``true`` for enabled.
.. code-block:: yaml
mock_data: true
* If you're using a real dataset, update the ``train_data_path`` field to point to the location of your dataset.
.. code-block:: bash
mock_data: false
train_data_path: /path/to/your/dataset
Ensure that the files are accessible inside the Docker container.
.. _amd-primus-megatron-lm-tokenizer:
Tokenizer
---------
In Primus, each model uses a tokenizer from Hugging Face. For example, Llama
3.1 8B model uses ``tokenizer_model: meta-llama/Llama-3.1-8B`` and
``tokenizer_type: Llama3Tokenizer`` defined in the `llama3.1-8B model
<https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/primus/configs/models/megatron/llama3.1_8B.yaml>`__
definition. As such, you need to set the ``HF_TOKEN`` environment variable with
right permissions to access the tokenizer for each model.
.. code-block:: bash
# Export your HF_TOKEN in the workspace
export HF_TOKEN=<your_hftoken>
.. _amd-primus-megatron-lm-run-training:
Run training
============
Use the following example commands to set up the environment, configure
:ref:`key options <amd-primus-megatron-lm-benchmark-test-vars>`, and run training on
MI300X series accelerators with the AMD Megatron-LM environment.
Single node training
--------------------
To run training on a single node, navigate to ``/workspace/Primus`` and use the following setup command:
.. code-block:: shell
pip install -r requirements.txt
export HSA_NO_SCRATCH_RECLAIM=1
export NVTE_CK_USES_BWD_V3=1
Once setup is complete, run the appropriate training command.
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.3-70b
To run pre-training for Llama 3.3 70B BF16, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
bash ./examples/run_pretrain.sh \
--micro_batch_size 2 \
--global_batch_size 16 \
--train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-8b
To run pre-training for Llama 3.1 8B FP8, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
bash ./examples/run_pretrain.sh \
--train_iters 50 \
--fp8 hybrid
For Llama 3.1 8B BF16, use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
bash ./examples/run_pretrain.sh --train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-70b
To run pre-training for Llama 3.1 70B BF16, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
bash ./examples/run_pretrain.sh \
--train_iters 50
To run the training on a single node for Llama 3.1 70B FP8 with proxy, use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
bash ./examples/run_pretrain.sh \
--train_iters 50 \
--num_layers 40 \
--fp8 hybrid \
--no_fp8_weight_transpose_cache true
.. note::
Use two or more nodes to run the *full* Llama 70B model with FP8 precision.
.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-7b
To run pre-training for Llama 2 7B FP8, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama2_7B-pretrain.yaml \
bash ./examples/run_pretrain.sh \
--train_iters 50 \
--fp8 hybrid
To run pre-training for Llama 2 7B BF16, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama2_7B-pretrain.yaml \
bash ./examples/run_pretrain.sh --train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b
To run pre-training for Llama 2 70B BF16, run:
.. code-block:: shell
EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
bash ./examples/run_pretrain.sh --train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v3-proxy
To run training on a single node for DeepSeek-V3 (MoE with expert parallel) with 3-layer proxy,
use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/deepseek_v3-pretrain.yaml \
bash examples/run_pretrain.sh \
--num_layers 3 \
--moe_layer_freq 1 \
--train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_deepseek-v2-lite-16b
To run training on a single node for DeepSeek-V2-Lite (MoE with expert parallel),
use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/deepseek_v2_lite-pretrain.yaml \
bash examples/run_pretrain.sh \
--global_batch_size 256 \
--train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x7b
To run training on a single node for Mixtral 8x7B (MoE with expert parallel),
use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/mixtral_8x7B_v0.1-pretrain.yaml \
bash examples/run_pretrain.sh --train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x22b-proxy
To run training on a single node for Mixtral 8x7B (MoE with expert parallel) with 4-layer proxy,
use the following command:
.. code-block:: shell
EXP=examples/megatron/configs/mixtral_8x22B_v0.1-pretrain.yaml \
bash examples/run_pretrain.sh \
--num_layers 4 \
--pipeline_model_parallel_size 1 \
--micro_batch_size 1 \
--global_batch_size 16 \
--train_iters 50
.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-7b
To run training on a single node for Qwen 2.5 7B BF16, use the following
command:
.. code-block:: shell
EXP=examples/megatron/configs/qwen2.5_7B-pretrain.yaml \
bash examples/run_pretrain.sh --train_iters 50
For FP8, use the following command.
.. code-block:: shell
EXP=examples/megatron/configs/qwen2.5_7B-pretrain.yaml \
bash examples/run_pretrain.sh \
--train_iters 50 \
--fp8 hybrid
.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-72b
To run the training on a single node for Qwen 2.5 72B BF16, use the following command.
.. code-block:: shell
EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
bash examples/run_pretrain.sh --train_iters 50
Multi-node training examples
----------------------------
To run training on multiple nodes, you can use the
`run_slurm_pretrain.sh <https://github.com/AMD-AIG-AIMA/Primus/tree/v0.1.0-rc1/examples/run_slurm_pretrain.sh>`__
to launch the multi-node workload. Use the following steps to setup your environment:
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-megatron-benchmark-models.yaml
{% set dockers = data.dockers %}
{% set docker = dockers[0] %}
.. code-block:: shell
cd /workspace/Primus/
export DOCKER_IMAGE={{ docker.pull_tag }}
export HF_TOKEN=<your_HF_token>
export HSA_NO_SCRATCH_RECLAIM=1
export NVTE_CK_USES_BWD_V3=1
export NCCL_IB_HCA=<your_NCCL_IB_HCA> # specify which RDMA interfaces to use for communication
export NCCL_SOCKET_IFNAME=<your_NCCL_SOCKET_IFNAME> # your Network Interface
export GLOO_SOCKET_IFNAME=<your_GLOO_SOCKET_IFNAME> # your Network Interface
export NCCL_IB_GID_INDEX=3 # Set InfiniBand GID index for NCCL communication. Default is 3 for ROCE
.. note::
* Make sure correct network drivers are installed on the nodes. If inside a Docker, either install the drivers inside the Docker container or pass the network drivers from the host while creating Docker container.
* If ``NCCL_IB_HCA`` and ``NCCL_SOCKET_IFNAME`` are not set, Primus will try to auto-detect. However, since NICs can vary accross different cluster, it is encouraged to explicitly export your NCCL parameters for the cluster.
* To find your network interface, you can use ``ip a``.
* To find RDMA interfaces, you can use ``ibv_devices`` to get the list of all the RDMA/IB devices.
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.3-70b
To train Llama 3.3 70B FP8 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 4 \
--global_batch_size 256 \
--recompute_num_layers 80 \
--no_fp8_weight_transpose_cache true \
--fp8 hybrid
To train Llama 3.3 70B BF16 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama3.3_70B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 1 \
--global_batch_size 256 \
--recompute_num_layers 12
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-8b
To train Llama 3.1 8B FP8 on 8 nodes, run:
.. code-block:: shell
# Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case
NNODES=8 EXP=examples/megatron/configs/llama3.1_8B-pretrain.yaml \
bash ./examples/run_slurm_pretrain.sh \
--global_batch_size 1024 \
--fp8 hybrid
.. container:: model-doc primus_pyt_megatron_lm_train_llama-3.1-70b
To train Llama 3.1 70B FP8 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 4 \
--global_batch_size 256 \
--recompute_num_layers 80 \
--no_fp8_weight_transpose_cache true \
--fp8 hybrid
To train Llama 3.1 70B BF16 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama3.1_70B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 1 \
--global_batch_size 256 \
--recompute_num_layers 12
.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-7b
To train Llama 2 8B FP8 on 8 nodes, run:
.. code-block:: shell
# Adjust the training parameters. For e.g., `global_batch_size: 8 * #single_node_bs` for 8 nodes in this case
NNODES=8 EXP=examples/megatron/configs/llama2_7B-pretrain.yaml bash ./examples/run_slurm_pretrain.sh --global_batch_size 2048 --fp8 hybrid
.. container:: model-doc primus_pyt_megatron_lm_train_llama-2-70b
To train Llama 2 70B FP8 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 10 \
--global_batch_size 640 \
--recompute_num_layers 80 \
--no_fp8_weight_transpose_cache true \
--fp8 hybrid
To train Llama 2 70B BF16 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/llama2_70B-pretrain.yaml \
bash ./examples/run_slurm_pretrain.sh \
--micro_batch_size 2 \
--global_batch_size 1536 \
--recompute_num_layers 12
.. container:: model-doc primus_pyt_megatron_lm_train_mixtral-8x7b
To train Mixtral 8x7B BF16 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/mixtral_8x7B_v0.1-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 2 \
--global_batch_size 256
.. container:: model-doc primus_pyt_megatron_lm_train_qwen2.5-72b
To train Qwen2.5 72B FP8 on 8 nodes, run:
.. code-block:: shell
NNODES=8 EXP=examples/megatron/configs/qwen2.5_72B-pretrain.yaml \
bash examples/run_slurm_pretrain.sh \
--micro_batch_size 8 \
--global_batch_size 512 \
--recompute_num_layers 80 \
--no_fp8_weight_transpose_cache true \
--fp8 hybrid
.. _amd-primus-megatron-lm-benchmark-test-vars:
Key options
-----------
The following are key options to take note of
fp8
``hybrid`` enables FP8 GEMMs.
use_torch_fsdp2
``use_torch_fsdp2: 1`` enables torch fsdp-v2. If FSDP is enabled,
set ``use_distributed_optimizer`` and ``overlap_param_gather`` to ``false``.
profile
To enable PyTorch profiling, set these parameters:
.. code-block:: yaml
profile: true
use_pytorch_profiler: true
profile_step_end: 7
profile_step_start: 6
train_iters
The total number of iterations (default: 50).
mock_data
True by default.
micro_batch_size
Micro batch size.
global_batch_size
Global batch size.
recompute_granularity
For activation checkpointing.
num_layers
For using a reduced number of layers as with proxy models.
Previous versions
=================
See :doc:`previous-versions/megatron-lm-history` to find documentation for previous releases
of the ``ROCm/megatron-lm`` Docker image.
This training environment now uses Primus with Megatron as the primary
configuration. Limited support for the legacy ROCm Megatron-LM is still
available. For instructions on using ROCm Megatron-LM, see the
:doc:`megatron-lm` document.

View File

@@ -21,6 +21,8 @@ In this guide, you'll learn about:
- Training a model
- :doc:`With Primus (Megatron-LM backend) <benchmark-docker/primus-megatron>`
- :doc:`With Megatron-LM <benchmark-docker/megatron-lm>`
- :doc:`With PyTorch <benchmark-docker/pytorch-training>`

View File

@@ -27,6 +27,24 @@ subtrees:
title: ROCm on Radeon GPUs
- file: how-to/deep-learning-rocm.md
title: Deep learning frameworks
subtrees:
- entries:
- file: compatibility/ml-compatibility/pytorch-compatibility.rst
title: PyTorch compatibility
- file: compatibility/ml-compatibility/tensorflow-compatibility.rst
title: TensorFlow compatibility
- file: compatibility/ml-compatibility/jax-compatibility.rst
title: JAX compatibility
- file: compatibility/ml-compatibility/verl-compatibility.rst
title: verl compatibility
- file: compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst
title: Stanford Megatron-LM compatibility
- file: compatibility/ml-compatibility/dgl-compatibility.rst
title: DGL compatibility
- file: compatibility/ml-compatibility/megablocks-compatibility.rst
title: Megablocks compatibility
- file: compatibility/ml-compatibility/taichi-compatibility.rst
title: Taichi compatibility
- file: how-to/build-rocm.rst
title: Build ROCm from source
@@ -44,8 +62,8 @@ subtrees:
title: Training
subtrees:
- entries:
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
title: Train a model with Megatron-LM
- file: how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.rst
title: Train a model with Primus and Megatron-Core
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
title: Train a model with PyTorch
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst