mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
Compare commits
29 Commits
docs/7.0-a
...
rjs/524991
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
393df3e05c | ||
|
|
aa3cdcb3c3 | ||
|
|
e8bb027c20 | ||
|
|
544186aef8 | ||
|
|
22524eeaa5 | ||
|
|
d471b04cd5 | ||
|
|
1c7cff8a47 | ||
|
|
84c664074f | ||
|
|
7c6083d840 | ||
|
|
94099b1398 | ||
|
|
3b3fc4894b | ||
|
|
8aba1d2318 | ||
|
|
e9e75cfc46 | ||
|
|
58b3ad0509 | ||
|
|
523d8520f3 | ||
|
|
d0c8ba0805 | ||
|
|
73de8a3e46 | ||
|
|
1fc312f90f | ||
|
|
fde2647ccd | ||
|
|
798c8debb5 | ||
|
|
393ba600c2 | ||
|
|
c64c545b52 | ||
|
|
76ee1d720f | ||
|
|
5adc040367 | ||
|
|
061da8f306 | ||
|
|
e26767bca6 | ||
|
|
7b6f1800d4 | ||
|
|
a6221937f2 | ||
|
|
9b102061f4 |
@@ -86,8 +86,7 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: HIP_INC_DIR
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -33,8 +33,9 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libmsgpack-dev
|
||||
- libboost-filesystem-dev
|
||||
- libboost-program-options-dev
|
||||
- libmsgpack-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
|
||||
@@ -39,4 +39,6 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
inputs:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
@@ -51,15 +51,15 @@ parameters:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - hipBLASLt:
|
||||
# name: hipBLASLt
|
||||
# sparseCheckoutDir: projects/hipblaslt
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLAS_common_build
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipBLASLt:
|
||||
name: hipBLASLt
|
||||
sparseCheckoutDir: projects/hipblaslt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipBLAS_common_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -122,14 +122,14 @@ jobs:
|
||||
# extraEnvVars:
|
||||
# - ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -77,28 +77,28 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
- { pool: rocm-ci_ultra_build_pool, os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { pool: rocm-ci_ultra_build_pool, os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# - rocBLAS:
|
||||
# name: rocBLAS
|
||||
# sparseCheckoutDir: projects/rocblas
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - hipBLASLt_build
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocBLAS:
|
||||
name: rocBLAS
|
||||
sparseCheckoutDir: projects/rocblas
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- hipBLASLt_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: DAY_STRING
|
||||
value: $[format('{0:ddMMyyyy}', pipeline.startTime)]
|
||||
pool: ${{ variables.ULTRA_BUILD_POOL }}
|
||||
pool: ${{ job.pool }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
@@ -140,6 +140,10 @@ jobs:
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -156,18 +160,15 @@ jobs:
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
# hipBLASLt has a script for gtest and lapack
|
||||
# https://github.com/ROCm/hipBLASLt/blob/develop/deps/CMakeLists.txt
|
||||
# $(Agent.BuildDirectory)/deps is a temporary folder for the build process
|
||||
# $(Agent.BuildDirectory)/s/deps is part of the hipBLASLt repo
|
||||
- task: Bash@3
|
||||
displayName: Build and install external dependencies
|
||||
displayName: Build and install LAPACK
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
mkdir -p $(Agent.BuildDirectory)/deps
|
||||
cd $(Agent.BuildDirectory)/deps
|
||||
cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
|
||||
mkdir -p $(Agent.BuildDirectory)/temp-deps
|
||||
cd $(Agent.BuildDirectory)/temp-deps
|
||||
# position-independent LAPACK is required for almalinux8 builds
|
||||
cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
|
||||
make
|
||||
sudo make install
|
||||
- script: |
|
||||
@@ -187,7 +188,7 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
@@ -244,6 +245,7 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -280,14 +282,14 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -156,6 +156,7 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -70,8 +70,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipSPARSELt
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -64,7 +83,11 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipSPARSELt_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_ubuntu2204_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -91,12 +114,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
@@ -131,8 +157,10 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -150,44 +178,49 @@ jobs:
|
||||
- TENSILE_ROCM_OFFLOAD_BUNDLER_PATH:::/home/user/workspace/rocm/llvm/bin/clang-offload-bundler
|
||||
installLatestCMake: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipSPARSELt_test_${{ job.target }}
|
||||
dependsOn: hipSPARSELt_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipSPARSELt
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparselt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_ubuntu2204_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_ubuntu2204_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './hipsparselt-test'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes --gtest_filter=*pre_checkin*'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -67,7 +67,6 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
skipLlvmSymlink: true
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
|
||||
@@ -86,8 +86,7 @@ jobs:
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -73,8 +73,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
||||
@@ -33,17 +33,15 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- python3-venv
|
||||
- git
|
||||
- libmsgpack-dev
|
||||
- gfortran
|
||||
- libopenblas-dev
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- wget
|
||||
- python3-pip
|
||||
- libdrm-dev
|
||||
- libmsgpack-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- wget
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -52,18 +50,17 @@ parameters:
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- rocm-cmake
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- clr
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- rocm_smi_lib
|
||||
- rocm-core
|
||||
- aomp
|
||||
- aomp-extras
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocm_smi_lib
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- ROCR-Runtime
|
||||
- roctracer
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
@@ -86,32 +83,40 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - name: downstreamComponentMatrix
|
||||
# type: object
|
||||
# default:
|
||||
# # rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# # for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocBLAS+rocPRIM
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
# technically hipSPARSELt is a downstream component of hipSPARSE
|
||||
# since hipSPARSE is not yet enabled, we will trigger it from rocBLAS in the interim
|
||||
- hipSPARSELt:
|
||||
name: hipSPARSELt
|
||||
sparseCheckoutDir: projects/hipsparselt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocBLAS_build
|
||||
# rocSOLVER depends on both rocBLAS and rocPRIM
|
||||
# for a unified build, rocBLAS will be the one to call rocSOLVER
|
||||
# - rocSOLVER:
|
||||
# name: rocSOLVER
|
||||
# sparseCheckoutDir: projects/rocsolver
|
||||
# skipUnifiedBuild: 'false'
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# unifiedBuild:
|
||||
# downstreamAggregateNames: rocBLAS+rocPRIM
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -151,6 +156,12 @@ jobs:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aocl.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -164,21 +175,12 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/amdclang
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DTensile_CODE_OBJECT_VERSION=default
|
||||
-DTensile_LOGIC=asm_full
|
||||
-DTensile_SEPARATE_ARCHITECTURES=ON
|
||||
-DTensile_LAZY_LIBRARY_LOADING=ON
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_CLIENTS_BENCHMARKS=OFF
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
@@ -208,6 +210,7 @@ jobs:
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
@@ -222,6 +225,7 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
@@ -258,18 +262,18 @@ jobs:
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
# - ${{ if parameters.triggerDownstreamJobs }}:
|
||||
# - ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
# - ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
# - template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
# parameters:
|
||||
# checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
# triggerDownstreamJobs: true
|
||||
# unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
# ${{ if parameters.unifiedBuild }}:
|
||||
# buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
# ${{ else }}:
|
||||
# buildDependsOn: ${{ component.buildDependsOn }}
|
||||
# downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
${{ if parameters.unifiedBuild }}:
|
||||
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
${{ else }}:
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
|
||||
@@ -166,6 +166,7 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -27,6 +27,7 @@ parameters:
|
||||
- numpy
|
||||
- tomli
|
||||
- scipy
|
||||
- pybind11
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
|
||||
@@ -210,7 +210,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
|
||||
@@ -36,6 +36,7 @@ parameters:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocDecode
|
||||
- rocJPEG
|
||||
- rocm-cmake
|
||||
- rocm-core
|
||||
- rocminfo
|
||||
@@ -192,9 +193,9 @@ jobs:
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -221,25 +222,17 @@ jobs:
|
||||
- task: CMake@1
|
||||
displayName: 'rocPyDecode Test CMake Flags'
|
||||
inputs:
|
||||
workingDirectory: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
|
||||
cmakeArgs: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(PYTHON_USER_SITE)/pybind11;$(PYTHON_DIST_PACKAGES)/pybind11;$(PYBIND11_PATH)
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
..
|
||||
.
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocPyDecode
|
||||
testDir: $(Build.SourcesDirectory)/build
|
||||
# sudo required for pip install but screws up permissions for next pipeline run
|
||||
- task: Bash@3
|
||||
displayName: Clean up test environment
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
pip uninstall -y rocPyDecode
|
||||
pip uninstall -y hip-python
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocpydecode/tests
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -33,13 +33,11 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- ninja-build
|
||||
- libsuitesparse-dev
|
||||
- gfortran
|
||||
- libfmt-dev
|
||||
- git
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- libfmt-dev
|
||||
- libsuitesparse-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -75,13 +73,13 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
# - { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
@@ -119,6 +117,10 @@ jobs:
|
||||
targetType: inline
|
||||
script: git clone --depth 1 --branch v3.9.1 https://github.com/Reference-LAPACK/lapack
|
||||
workingDirectory: '$(Build.SourcesDirectory)'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -134,6 +136,7 @@ jobs:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DCMAKE_Fortran_FLAGS=-fno-optimize-sibling-calls
|
||||
-DBUILD_TESTING=OFF
|
||||
-DCBLAS=ON
|
||||
@@ -146,7 +149,7 @@ jobs:
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Pipeline.Workspace)/deps-install;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
@@ -191,6 +194,7 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
@@ -67,7 +67,6 @@ jobs:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
skipLlvmSymlink: true
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
|
||||
@@ -407,7 +407,6 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: $(JOB_GPU_TARGET)
|
||||
dependencySource: staging
|
||||
skipLlvmSymlink: true
|
||||
# get sources to run test scripts
|
||||
- task: Bash@3
|
||||
displayName: git clone upstream pytorch
|
||||
|
||||
@@ -119,7 +119,6 @@ jobs:
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
skipLibraryLinking: true
|
||||
- script: df -h
|
||||
displayName: System disk space after ROCm
|
||||
- script: du -sh $(Agent.BuildDirectory)/rocm
|
||||
|
||||
@@ -12,6 +12,9 @@ parameters:
|
||||
- name: fileFilter
|
||||
type: string
|
||||
default: ''
|
||||
- name: extractAndDeleteFiles
|
||||
type: boolean
|
||||
default: true
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -37,16 +40,17 @@ steps:
|
||||
buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
|
||||
${{ else }}:
|
||||
buildVersionToDownload: latestFromBranch
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: '$(Agent.BuildDirectory)/rocm'
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Cleanup Compressed ${{ parameters.componentName }}
|
||||
inputs:
|
||||
SourceFolder: '$(Pipeline.Workspace)/d'
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- ${{ if eq(parameters.extractAndDeleteFiles, true) }}:
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ${{ parameters.componentName }}
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: '$(Agent.BuildDirectory)/rocm'
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up Compressed ${{ parameters.componentName }}
|
||||
inputs:
|
||||
SourceFolder: '$(Pipeline.Workspace)/d'
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
|
||||
@@ -15,8 +15,8 @@ steps:
|
||||
URL_BEGIN="https://artprodcus3.artifacts.visualstudio.com/"
|
||||
URL_MIDDLE="/_apis/artifact/"
|
||||
URL_END="/content?format=file&subPath=%2F"
|
||||
FORMATTED_JOB_NAME=$(echo $(Agent.JobName) | sed 's/ /./g; s/[-_]//g')
|
||||
ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${FORMATTED_JOB_NAME}"
|
||||
ARTIFACT_NAME="$(Agent.JobName)_$(System.JobAttempt)"
|
||||
ARTIFACT_STRING="pipelineartifact://ROCm-CI/projectId/$(DOWNLOAD_PROJECT_ID)/buildId/$(Build.BuildId)/artifactName/${ARTIFACT_NAME}"
|
||||
ENCODED_STRING=$(echo -n "${ARTIFACT_STRING}" | base64 -w 0)
|
||||
PADDING_COUNT=$(echo -n "${ENCODED_STRING}" | awk -F= '{print NF-1}')
|
||||
if [ "$PADDING_COUNT" -gt 0 ]; then
|
||||
|
||||
@@ -46,5 +46,5 @@ steps:
|
||||
displayName: '${{ parameters.artifactName }} Publish'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
|
||||
artifactName: $(Agent.JobName)_$(System.JobAttempt)
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
parameters:
|
||||
- name: os
|
||||
type: string
|
||||
default: ubuntu2204
|
||||
- name: repositoryUrl
|
||||
type: string
|
||||
default: https://download.amd.com/developer/eula/aocl/aocl-4-2
|
||||
- name: packageName
|
||||
type: string
|
||||
default: aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
type: object
|
||||
default:
|
||||
ubuntu2204: aocl-linux-gcc-4.2.0_1_amd64.deb
|
||||
almalinux8: aocl-linux-gcc-4.2.0-1.x86_64.rpm
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
@@ -12,16 +17,19 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName }}
|
||||
script: wget -nv ${{ parameters.repositoryUrl }}/${{ parameters.packageName[parameters.os] }}
|
||||
- task: Bash@3
|
||||
displayName: Install AOCL
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
script: sudo apt install -y ./${{ parameters.packageName }}
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: sudo apt install -y ./${{ parameters.packageName[parameters.os] }}
|
||||
${{ elseif eq(parameters.os, 'almalinux8') }}:
|
||||
script: sudo dnf install -y ./${{ parameters.packageName[parameters.os] }}
|
||||
- task: Bash@3
|
||||
displayName: Clean up AOCL
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
script: rm -f ${{ parameters.packageName }}
|
||||
script: rm -f ${{ parameters.packageName[parameters.os] }}
|
||||
|
||||
@@ -52,6 +52,7 @@ parameters:
|
||||
libexpat-dev: expat-devel
|
||||
libffi-dev: libffi-devel
|
||||
libfftw3-dev: fftw-devel
|
||||
libfmt-dev: fmt-devel
|
||||
libgmp-dev: gmp-devel
|
||||
liblzma-dev: xz-devel
|
||||
libmpfr-dev: mpfr-devel
|
||||
|
||||
@@ -19,16 +19,6 @@ parameters:
|
||||
- name: gpuTarget
|
||||
type: string
|
||||
default: ''
|
||||
# set to true if you're calling this template file multiple files in same pipeline
|
||||
# only leave last call false to optimize sequence
|
||||
- name: skipLibraryLinking
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if llvm-project is not downloaded in a particular call
|
||||
# or if you just don't want the symlink
|
||||
- name: skipLlvmSymlink
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if dlopen calls for HIP libraries are causing failures
|
||||
# because they do not follow shared library symlink convention
|
||||
- name: setupHIPLibrarySymlinks
|
||||
@@ -367,6 +357,7 @@ steps:
|
||||
componentName: ${{ split(dependency, ':')[0] }}
|
||||
pipelineId: ${{ parameters.componentVarList[split(dependency, ':')[0]].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
extractAndDeleteFiles: false
|
||||
${{ if parameters.componentVarList[split(dependency, ':')[0]].hasGpuTarget }}:
|
||||
fileFilter: "${{ split(dependency, ':')[1] }}*_${{ parameters.os }}_${{ parameters.gpuTarget }}"
|
||||
# dependencySource = staging
|
||||
@@ -405,6 +396,7 @@ steps:
|
||||
componentName: ${{ dependency }}
|
||||
pipelineId: ${{ parameters.componentVarList[dependency].pipelineId }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
extractAndDeleteFiles: false
|
||||
${{ if parameters.componentVarList[dependency].hasGpuTarget }}:
|
||||
fileFilter: ${{ parameters.os }}_${{ parameters.gpuTarget }}
|
||||
${{ else }}:
|
||||
@@ -430,8 +422,20 @@ steps:
|
||||
# default = staging
|
||||
${{ else }}:
|
||||
branchName: ${{ parameters.componentVarList[dependency].stagingBranch }}
|
||||
# Set link to redirect llvm folder
|
||||
- ${{ if eq(parameters.skipLlvmSymlink, false) }}:
|
||||
- task: ExtractFiles@1
|
||||
displayName: Extract ROCm artifacts
|
||||
inputs:
|
||||
archiveFilePatterns: $(Pipeline.Workspace)/d/**/*.tar.gz
|
||||
destinationFolder: $(Agent.BuildDirectory)/rocm
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up ROCm artifacts
|
||||
inputs:
|
||||
SourceFolder: $(Pipeline.Workspace)/d
|
||||
Contents: '**/*.tar.gz'
|
||||
RemoveDotFiles: true
|
||||
- ${{ if containsValue(parameters.dependencyList, 'llvm-project') }}:
|
||||
- task: Bash@3
|
||||
displayName: Symlink from rocm/llvm to rocm/lib/llvm
|
||||
inputs:
|
||||
@@ -439,6 +443,7 @@ steps:
|
||||
script: |
|
||||
sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
|
||||
echo "Created symlink from rocm/llvm to rocm/lib/llvm"
|
||||
- task: Bash@3
|
||||
displayName: Symlink executables from rocm/llvm/bin to rocm/bin
|
||||
inputs:
|
||||
@@ -446,7 +451,14 @@ steps:
|
||||
script: |
|
||||
for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
|
||||
sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
|
||||
echo "Created symlink from rocm/llvm/bin/$file to rocm/bin/$file"
|
||||
done
|
||||
- ${{ if containsValue(parameters.dependencyList, 'rocm-core') }}:
|
||||
- task: Bash@3
|
||||
displayName: Print rocm/.info/version
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: cat $(Agent.BuildDirectory)/rocm/.info/version
|
||||
# dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
|
||||
# the convention is as follows:
|
||||
# unversioned .so is a symlink to major version .so
|
||||
@@ -483,17 +495,16 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: ls -la1R $(Agent.BuildDirectory)/rocm
|
||||
- ${{ if eq(parameters.skipLibraryLinking, false) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
- task: Bash@3
|
||||
displayName: 'Link ROCm shared libraries'
|
||||
inputs:
|
||||
targetType: inline
|
||||
# OS ignores if the ROCm lib folder shows up more than once
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/rocm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
echo $(Agent.BuildDirectory)/rocm/llvm/lib64 | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
|
||||
@@ -23,13 +23,14 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo apt-get install -y jq
|
||||
${{ iif(or(eq(parameters.os, 'ubuntu2204'), eq(parameters.os, 'ubuntu2404')), 'sudo apt-get install -y jq', '') }}
|
||||
|
||||
# RESOURCES_REPOSITORIES is a runtime variable (not an env var!) that contains quotations and newlines
|
||||
# So we need to save it to a file to properly preserve its formatting and contents
|
||||
cat <<EOF > resources.repositories
|
||||
$(RESOURCES_REPOSITORIES)
|
||||
EOF
|
||||
echo "Value of resources.repositories:"
|
||||
cat resources.repositories
|
||||
|
||||
IS_TAG_BUILD=$(jq 'has("release_repo")' resources.repositories)
|
||||
@@ -66,8 +67,6 @@ steps:
|
||||
)
|
||||
' resources.repositories)
|
||||
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json
|
||||
|
||||
dependencies=()
|
||||
for manifest_file in $(Pipeline.Workspace)/d/**/manifest_*.json; do
|
||||
echo "Processing $manifest_file"
|
||||
@@ -78,6 +77,10 @@ steps:
|
||||
done
|
||||
dependencies_json=$(printf '%s\n' "${dependencies[@]}" | jq -s '.')
|
||||
|
||||
manifest_filename="manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}"
|
||||
echo "##vso[task.setvariable variable=manifest_filename]$manifest_filename"
|
||||
manifest_json=$(Build.ArtifactStagingDirectory)/$manifest_filename.json
|
||||
|
||||
jq -n \
|
||||
--argjson current "$current" \
|
||||
--argjson dependencies "$dependencies_json" \
|
||||
@@ -111,8 +114,14 @@ steps:
|
||||
')
|
||||
dependencies_rows=$(echo $dependencies_rows)
|
||||
echo "##vso[task.setvariable variable=dependencies_rows;]$dependencies_rows"
|
||||
|
||||
cat $manifest_json
|
||||
- task: Bash@3
|
||||
displayName: Print manifest.json
|
||||
condition: always()
|
||||
continueOnError: true
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
cat $(Build.ArtifactStagingDirectory)/$(manifest_filename).json
|
||||
- task: Bash@3
|
||||
displayName: Create manifest.html
|
||||
condition: always()
|
||||
@@ -120,10 +129,10 @@ steps:
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
manifest_html=$(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
manifest_html="$(Build.ArtifactStagingDirectory)/$(manifest_filename).html"
|
||||
cat <<EOF > $manifest_html
|
||||
<html>
|
||||
<h1>Manifest</h1>
|
||||
<h1>$(manifest_filename)</h1>
|
||||
<h2>Current</h2>
|
||||
<table border="1">
|
||||
<tr>
|
||||
@@ -163,7 +172,7 @@ steps:
|
||||
continueOnError: true
|
||||
inputs:
|
||||
tabName: Manifest
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html
|
||||
reportDir: $(Build.ArtifactStagingDirectory)/$(manifest_filename).html
|
||||
- task: Bash@3
|
||||
displayName: Save manifest artifact file name
|
||||
condition: always()
|
||||
@@ -172,5 +181,5 @@ steps:
|
||||
workingDirectory: $(Pipeline.Workspace)
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.html" >> pipelineArtifacts.txt
|
||||
echo "manifest_${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.json" >> pipelineArtifacts.txt
|
||||
echo "$(manifest_filename).html" >> pipelineArtifacts.txt
|
||||
echo "$(manifest_filename).json" >> pipelineArtifacts.txt
|
||||
|
||||
@@ -17,7 +17,6 @@ steps:
|
||||
script: |
|
||||
AZ_API="https://dev.azure.com/ROCm-CI/ROCm-CI/_apis"
|
||||
GH_API="https://api.github.com/repos/ROCm"
|
||||
ARTIFACT_NAME="composablekernelbuild${{ parameters.gpuTarget }}"
|
||||
EXIT_CODE=0
|
||||
|
||||
# Try to find an Azure build for the specific CK commit called out in MIOpen's requirements.txt
|
||||
@@ -39,8 +38,15 @@ steps:
|
||||
echo "Found specific CK build ID: $CK_BUILD_ID"
|
||||
fi
|
||||
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | \
|
||||
jq --arg os "ubuntu2204" --arg gfx "${{ parameters.gpuTarget }}" '
|
||||
.value
|
||||
| map(select(.name | test($os) and test($gfx)))
|
||||
| max_by(.name | capture("drop_(?<dropNumber>\\d+)").dropNumber | tonumber)
|
||||
| .resource.downloadUrl
|
||||
' | \
|
||||
tr -d '"')
|
||||
|
||||
# If using the specific CK commit and it doesn't have any valid artifacts, use latest successful CK build instead
|
||||
if { [[ -z "$ARTIFACT_URL" ]] || [[ "$ARTIFACT_URL" == "null" ]]; } && [[ $EXIT_CODE -eq 0 ]]; then
|
||||
@@ -48,8 +54,15 @@ steps:
|
||||
LATEST_BUILD_URL="$AZ_API/build/builds?definitions=$(COMPOSABLE_KERNEL_PIPELINE_ID)&statusFilter=completed&resultFilter=succeeded&\$top=1&api-version=7.1"
|
||||
CK_BUILD_ID=$(curl -s $LATEST_BUILD_URL | jq '.value[0].id')
|
||||
echo "Found latest CK build ID: $CK_BUILD_ID"
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?artifactName=$ARTIFACT_NAME&api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | jq '.resource.downloadUrl' | tr -d '"')
|
||||
AZURE_URL="$AZ_API/build/builds/$CK_BUILD_ID/artifacts?api-version=7.1"
|
||||
ARTIFACT_URL=$(curl -s $AZURE_URL | \
|
||||
jq --arg os "ubuntu2204" --arg gfx "${{ parameters.gpuTarget }}" '
|
||||
.value
|
||||
| map(select(.name | test($os) and test($gfx)))
|
||||
| max_by(.name | capture("drop_(?<dropNumber>\\d+)").dropNumber | tonumber)
|
||||
| .resource.downloadUrl
|
||||
' | \
|
||||
tr -d '"')
|
||||
EXIT_CODE=2
|
||||
fi
|
||||
|
||||
@@ -57,8 +70,8 @@ steps:
|
||||
wget --tries=5 --waitretry=10 --retry-connrefused -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
tar -zxvf $(System.ArtifactsDirectory)/$ARTIFACT_NAME/*.tar.gz -C $(Agent.BuildDirectory)/rocm
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/$ARTIFACT_NAME
|
||||
tar -zxvf $(System.ArtifactsDirectory)/composable_kernel*/*.tar.gz -C $(Agent.BuildDirectory)/rocm
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/composable_kernel*
|
||||
|
||||
if [[ $EXIT_CODE -ne 0 ]]; then
|
||||
BUILD_COMMIT=$(curl -s $AZ_API/build/builds/$CK_BUILD_ID | jq '.sourceVersion' | tr -d '"')
|
||||
|
||||
@@ -66,11 +66,11 @@ variables:
|
||||
- name: HIP_TESTS_PIPELINE_ID
|
||||
value: 233
|
||||
- name: HIPBLAS_COMMON_PIPELINE_ID
|
||||
value: 223
|
||||
value: 300
|
||||
- name: HIPBLAS_PIPELINE_ID
|
||||
value: 87
|
||||
- name: HIPBLASLT_PIPELINE_ID
|
||||
value: 112
|
||||
value: 301
|
||||
- name: HIPCUB_PIPELINE_ID
|
||||
value: 277
|
||||
- name: HIPFFT_PIPELINE_ID
|
||||
@@ -104,7 +104,7 @@ variables:
|
||||
- name: ROCALUTION_PIPELINE_ID
|
||||
value: 89
|
||||
- name: ROCBLAS_PIPELINE_ID
|
||||
value: 85
|
||||
value: 302
|
||||
- name: ROCDBGAPI_PIPELINE_ID
|
||||
value: 135
|
||||
- name: ROCDECODE_PIPELINE_ID
|
||||
|
||||
@@ -654,4 +654,4 @@ There are a number of upcoming changes planned for HIP runtime API in an upcomin
|
||||
that are not backward compatible with prior releases. Most of these changes increase
|
||||
alignment between HIP and CUDA APIs or behavior. Some of the upcoming changes are to
|
||||
clean up header files, remove namespace collision, and have a clear separation between
|
||||
`hipRTC` and HIP runtime.
|
||||
`hipRTC` and HIP runtime. For more information, see [HIP 7.0 Is Coming: What You Need to Know to Stay Ahead](https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0-blog/README.html).
|
||||
|
||||
@@ -155,7 +155,7 @@ compatibility and system requirements.
|
||||
.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi300_620] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#kfd_support] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.
|
||||
|
||||
@@ -235,6 +235,6 @@ Expand for full historical view of:
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#kfd_support-past-60] Starting from ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart (assuming hardware support is available in both). For earlier ROCm releases, the compatibility is provided for +/- 2 releases. These are the compatibility combinations that are currently supported.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
.. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.5, and RHEL 9.4.
|
||||
|
||||
@@ -8,7 +8,7 @@ MI300 and MI200 series performance counters and metrics
|
||||
|
||||
This document lists and describes the hardware performance counters and derived metrics available
|
||||
for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the
|
||||
:doc:`ROCProfiler tool <rocprofiler:rocprofv1>`.
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`.
|
||||
|
||||
MI300 and MI200 series performance counters
|
||||
===============================================================
|
||||
|
||||
@@ -129,6 +129,7 @@ html_theme_options = {"link_main_doc": False}
|
||||
redirects = {"reference/openmp/openmp": "../../about/compatibility/openmp.html"}
|
||||
|
||||
numfig = False
|
||||
suppress_warnings = ["autosectionlabel.*"]
|
||||
|
||||
html_context = {
|
||||
"project_path" : {project_path},
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c
|
||||
rocm_version: 6.4.1
|
||||
vllm_version: 0.9.0.1 (0.9.0.2.dev108+g71faa1880.rocm641)
|
||||
pytorch_version: 2.7.0+gitf717b2a
|
||||
hipblaslt_version: 0.15
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_vllm_llama-3.1-70b
|
||||
model_repo: meta-llama/Llama-3.1-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 2 7B
|
||||
mad_tag: pyt_vllm_llama-2-7b
|
||||
model_repo: meta-llama/Llama-2-7b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
|
||||
precision: float16
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-70b_fp8
|
||||
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mistral 7B
|
||||
mad_tag: pyt_vllm_mistral-7b
|
||||
model_repo: mistralai/Mistral-7B-Instruct-v0.3
|
||||
url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- model: Mistral 7B FP8
|
||||
mad_tag: pyt_vllm_mistral-7b_fp8
|
||||
model_repo: amd/Mistral-7B-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen2 7B
|
||||
mad_tag: pyt_vllm_qwen2-7b
|
||||
model_repo: Qwen/Qwen2-7B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
|
||||
precision: float16
|
||||
- model: Qwen2 72B
|
||||
mad_tag: pyt_vllm_qwen2-72b
|
||||
model_repo: Qwen/Qwen2-72B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
|
||||
precision: float16
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
tunableop: true
|
||||
- group: Databricks DBRX
|
||||
tag: dbrx
|
||||
models:
|
||||
- model: DBRX Instruct
|
||||
mad_tag: pyt_vllm_dbrx-instruct
|
||||
model_repo: databricks/dbrx-instruct
|
||||
url: https://huggingface.co/databricks/dbrx-instruct
|
||||
precision: float16
|
||||
- model: DBRX Instruct FP8
|
||||
mad_tag: pyt_vllm_dbrx_fp8
|
||||
model_repo: amd/dbrx-instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
|
||||
precision: float8
|
||||
- group: Google Gemma
|
||||
tag: gemma
|
||||
models:
|
||||
- model: Gemma 2 27B
|
||||
mad_tag: pyt_vllm_gemma-2-27b
|
||||
model_repo: google/gemma-2-27b
|
||||
url: https://huggingface.co/google/gemma-2-27b
|
||||
precision: float16
|
||||
- group: Cohere
|
||||
tag: cohere
|
||||
models:
|
||||
- model: C4AI Command R+ 08-2024
|
||||
mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
|
||||
model_repo: CohereForAI/c4ai-command-r-plus-08-2024
|
||||
url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
|
||||
precision: float16
|
||||
- model: C4AI Command R+ 08-2024 FP8
|
||||
mad_tag: pyt_vllm_command-r-plus_fp8
|
||||
model_repo: amd/c4ai-command-r-plus-FP8-KV
|
||||
url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
|
||||
precision: float8
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek MoE 16B
|
||||
mad_tag: pyt_vllm_deepseek-moe-16b-chat
|
||||
model_repo: deepseek-ai/deepseek-moe-16b-chat
|
||||
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
|
||||
precision: float16
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
- group: TII Falcon
|
||||
tag: falcon
|
||||
models:
|
||||
- model: Falcon 180B
|
||||
mad_tag: pyt_vllm_falcon-180b
|
||||
model_repo: tiiuae/falcon-180B
|
||||
url: https://huggingface.co/tiiuae/falcon-180B
|
||||
precision: float16
|
||||
@@ -31,3 +31,11 @@ pytorch_inference_benchmark:
|
||||
model_repo: genmo/mochi-1-preview
|
||||
url: https://huggingface.co/genmo/mochi-1-preview
|
||||
precision: float16
|
||||
- group: Wan2.1
|
||||
tag: wan
|
||||
models:
|
||||
- model: Wan2.1
|
||||
mad_tag: pyt_wan2.1_inference
|
||||
model_repo: Wan-AI/Wan2.1-T2V-14B
|
||||
url: https://huggingface.co/Wan-AI/Wan2.1-T2V-14B
|
||||
precision: bfloat16
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c
|
||||
# TODO: update me
|
||||
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.1_20250702
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250702/images/sha256-45068a2079cb8df554ed777141bf0c67d6627c470a897256e60c9f262677faab
|
||||
rocm_version: 6.4.1
|
||||
vllm_version: 0.9.0.1 (0.9.0.2.dev108+g71faa1880.rocm641)
|
||||
vllm_version: 0.9.1 (0.9.2.dev206+gb335519f2.rocm641)
|
||||
pytorch_version: 2.7.0+gitf717b2a
|
||||
hipblaslt_version: 0.15
|
||||
model_groups:
|
||||
|
||||
@@ -7,21 +7,21 @@ AMD Instinct MI300X performance guides
|
||||
**************************************
|
||||
|
||||
The following performance guides provide essential guidance on the necessary
|
||||
steps to properly :doc:`configure your system for AMD Instinct™ MI300X
|
||||
accelerators <../system-optimization/mi300x>`. They include detailed
|
||||
instructions on system settings and application :doc:`workload tuning
|
||||
<../rocm-for-ai/inference-optimization/workload>` to help you
|
||||
leverage the maximum capabilities of these accelerators and achieve superior
|
||||
performance.
|
||||
steps to properly `configure your system for AMD Instinct™ MI300X accelerators
|
||||
<https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
|
||||
They include detailed instructions on system settings and application
|
||||
:doc:`workload tuning </how-to/rocm-for-ai/inference-optimization/workload>` to
|
||||
help you leverage the maximum capabilities of these accelerators and achieve
|
||||
superior performance.
|
||||
|
||||
* `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`__
|
||||
covers essential system settings and system management practices to configure
|
||||
your AMD Instinct MI300X system for performance.
|
||||
|
||||
* :doc:`../rocm-for-ai/inference-optimization/workload` covers steps to
|
||||
* :doc:`/how-to/rocm-for-ai/inference-optimization/workload` covers steps to
|
||||
optimize the performance of AMD Instinct MI300X series accelerators for HPC
|
||||
and deep learning operations.
|
||||
|
||||
* :doc:`../rocm-for-ai/inference/vllm-benchmark` introduces a preconfigured
|
||||
* :doc:`/how-to/rocm-for-ai/inference/benchmark-docker/vllm` introduces a preconfigured
|
||||
environment for LLM inference, designed to help you test performance with
|
||||
popular models on AMD Instinct MI300X series accelerators.
|
||||
|
||||
@@ -24,5 +24,3 @@ training, fine-tuning, and inference. It leverages popular machine learning fram
|
||||
- :doc:`Fine-tuning and inference <fine-tuning-and-inference>` using a
|
||||
:doc:`single-accelerator <single-gpu-fine-tuning-and-inference>` or
|
||||
:doc:`multi-accelerator <multi-gpu-fine-tuning-and-inference>` system.
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
Use ROCm for AI
|
||||
**************************
|
||||
|
||||
ROCm™ is an open-source software platform that enables high-performance computing and machine learning applications. It features the ability to accelerate training, fine-tuning, and inference for AI application development. With ROCm, you can access the full power of AMD GPUs, which can significantly improve the performance and efficiency of AI workloads.
|
||||
ROCm is an open-source software platform that enables high-performance computing and machine learning applications. It features the ability to accelerate training, fine-tuning, and inference for AI application development. With ROCm, you can access the full power of AMD GPUs, which can significantly improve the performance and efficiency of AI workloads.
|
||||
|
||||
You can use ROCm to perform distributed training, which enables you to train models across multiple GPUs or nodes simultaneously. Additionally, ROCm supports mixed-precision training, which can help reduce the memory and compute requirements of training workloads. For fine-tuning, ROCm provides access to various algorithms and optimization techniques. In terms of inference, ROCm provides several techniques that can help you optimize your models for deployment, such as quantization, GEMM tuning, and optimization with composable kernel.
|
||||
|
||||
|
||||
@@ -151,8 +151,8 @@ desired effect. Continuous iteration helps refine the performance gains and
|
||||
address any new bottlenecks that may emerge.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image that has everything required to implement
|
||||
the tips in this section. It includes ROCm, vLLM, PyTorch, and tuning files in the CSV
|
||||
format. For more information, see :doc:`../inference/vllm-benchmark`.
|
||||
the LLM inference tips in this section. It includes ROCm, PyTorch, and vLLM.
|
||||
For more information, see :doc:`/how-to/rocm-for-ai/inference/benchmark-docker/vllm`.
|
||||
|
||||
.. _mi300x-profiling-tools:
|
||||
|
||||
@@ -343,9 +343,10 @@ The following performance tips are not *specific* to vLLM -- they are general
|
||||
but relevant in this context. You can tune the following vLLM parameters to
|
||||
achieve optimal request latency and throughput performance.
|
||||
|
||||
* As described in :ref:`mi300x-env-vars`, the environment
|
||||
variable ``HIP_FORCE_DEV_KERNARG`` can improve vLLM performance. Set it to
|
||||
``export HIP_FORCE_DEV_KERNARG=1``.
|
||||
* As described in `Environment variables (MI300X)
|
||||
<https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#environment-variables>`_,
|
||||
the environment variable ``HIP_FORCE_DEV_KERNARG`` can improve vLLM
|
||||
performance. Set it to ``export HIP_FORCE_DEV_KERNARG=1``.
|
||||
|
||||
* Set the :ref:`RCCL environment variable <mi300x-rccl>` ``NCCL_MIN_NCHANNELS``
|
||||
to ``112`` to increase the number of channels on MI300X to potentially improve
|
||||
@@ -410,9 +411,9 @@ for additional performance tips. :ref:`fine-tuning-llms-vllm` describes vLLM
|
||||
usage with ROCm.
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance
|
||||
of LLM inference with vLLM on the MI300X accelerator. The Docker image includes
|
||||
ROCm, vLLM, PyTorch, and tuning files in the CSV format. For more information,
|
||||
see :doc:`../inference/vllm-benchmark`.
|
||||
of LLM inference with vLLM on MI300X series accelerators. The Docker image includes
|
||||
ROCm, vLLM, and PyTorch. For more information, see
|
||||
:doc:`/how-to/rocm-for-ai/inference/benchmark-docker/vllm`.
|
||||
|
||||
.. _mi300x-vllm-throughput-measurement:
|
||||
|
||||
@@ -1477,8 +1478,9 @@ following command: ``cat /proc/sys/kernel/numa_balancing`` and
|
||||
checking whether the output is ``0``.
|
||||
|
||||
If the output is ``1``, you can disable NUMA auto-balancing by running the
|
||||
following command: ``sudo sysctl kernel.numa_balancing=0``. For more
|
||||
details, see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
following command: ``sudo sysctl kernel.numa_balancing=0``. For more details,
|
||||
see `AMD Instinct MI300X system optimization
|
||||
<https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#disable-numa-auto-balancing>`_.
|
||||
|
||||
.. _mi300x-rccl-disable-acs:
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -322,22 +322,22 @@ Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- For a list of other ready-made Docker images for ROCm, see the
|
||||
:doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -82,7 +82,7 @@ MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -392,25 +392,22 @@ Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`.
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Using ROCm for AI </how-to/rocm-for-ai/index>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/llm-fine-tuning-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- For a list of other ready-made Docker images for ROCm, see the
|
||||
:doc:`Docker image support matrix <rocm-install-on-linux:reference/docker-image-support-matrix>`.
|
||||
|
||||
- To compare with the previous version of the ROCm vLLM Docker image for performance validation, refer to
|
||||
`LLM inference performance validation on AMD Instinct MI300X (ROCm 6.2.0) <https://rocm.docs.amd.com/en/docs-6.2.0/how-to/performance-validation/mi300x/vllm-benchmark.html>`_.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -55,7 +55,7 @@ MI300X accelerator with the prebuilt vLLM Docker image.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -437,22 +437,22 @@ Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see :doc:`../../system-optimization/mi300x`.
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -130,7 +130,7 @@ vLLM inference performance testing
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see :ref:`AMD Instinct MI300X system optimization <mi300x-disable-numa>`.
|
||||
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -305,22 +305,22 @@ Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
|
||||
ROCm vLLM Docker image.
|
||||
@@ -319,22 +321,22 @@ Further reading
|
||||
===============
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -333,19 +333,19 @@ Further reading
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../../../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <../../hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../../../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../../../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
@@ -333,22 +333,23 @@ Further reading
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <../hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`vllm-history` to find documentation for previous releases
|
||||
of the ``ROCm/vllm`` Docker image.
|
||||
|
||||
|
||||
@@ -0,0 +1,353 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
|
||||
ROCm vLLM Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
**********************************
|
||||
vLLM inference performance testing
|
||||
**********************************
|
||||
|
||||
.. caution::
|
||||
|
||||
This documentation does not reflect the latest version of ROCm vLLM
|
||||
inference performance documentation. See :doc:`../vllm` for the latest version.
|
||||
|
||||
.. _vllm-benchmark-unified-docker:
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/vllm_0.9.0.1_20250605-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.vllm_benchmark.unified_docker.latest %}
|
||||
{% set model_groups = data.vllm_benchmark.model_groups %}
|
||||
|
||||
The `ROCm vLLM Docker <{{ unified_docker.docker_hub_url }}>`_ image offers
|
||||
a prebuilt, optimized environment for validating large language model (LLM)
|
||||
inference performance on AMD Instinct™ MI300X series accelerators. This ROCm vLLM
|
||||
Docker image integrates vLLM and PyTorch tailored specifically for MI300X series
|
||||
accelerators and includes the following components:
|
||||
|
||||
* `ROCm {{ unified_docker.rocm_version }} <https://github.com/ROCm/ROCm>`_
|
||||
|
||||
* `vLLM {{ unified_docker.vllm_version }} <https://docs.vllm.ai/en/latest>`_
|
||||
|
||||
* `PyTorch {{ unified_docker.pytorch_version }} <https://github.com/ROCm/pytorch.git>`_
|
||||
|
||||
* `hipBLASLt {{ unified_docker.hipblaslt_version }} <https://github.com/ROCm/hipBLASLt>`_
|
||||
|
||||
With this Docker image, you can quickly test the :ref:`expected
|
||||
inference performance numbers <vllm-benchmark-performance-measurements>` for
|
||||
MI300X series accelerators.
|
||||
|
||||
.. _vllm-benchmark-available-models:
|
||||
|
||||
Supported models
|
||||
================
|
||||
|
||||
The following models are supported for inference performance benchmarking
|
||||
with vLLM and ROCm. Some instructions, commands, and recommendations in this
|
||||
documentation might vary by model -- select one to get started.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row">
|
||||
<div class="col-2 me-2 model-param-head">Model group</div>
|
||||
<div class="row col-10">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row mt-1">
|
||||
<div class="col-2 me-2 model-param-head">Model</div>
|
||||
<div class="row col-10">
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if models|length % 3 == 0 %}
|
||||
<div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% else %}
|
||||
<div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
.. _vllm-benchmark-vllm:
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{model.mad_tag}}
|
||||
|
||||
.. note::
|
||||
|
||||
See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ to learn more about your selected model.
|
||||
Some models require access authorization prior to use via an external license agreement through a third party.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. note::
|
||||
|
||||
vLLM is a toolkit and library for LLM inference and serving. AMD implements
|
||||
high-performance custom kernels and modules in vLLM to enhance performance.
|
||||
See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for
|
||||
more information.
|
||||
|
||||
.. _vllm-benchmark-performance-measurements:
|
||||
|
||||
Performance measurements
|
||||
========================
|
||||
|
||||
To evaluate performance, the
|
||||
`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
|
||||
page provides reference throughput and latency measurements for inferencing popular AI models.
|
||||
|
||||
.. important::
|
||||
|
||||
The performance data presented in
|
||||
`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html>`_
|
||||
only reflects the latest version of this inference benchmarking environment.
|
||||
The listed measurements should not be interpreted as the peak performance achievable by AMD Instinct MI325X and MI300X accelerators or ROCm software.
|
||||
|
||||
Advanced features and known issues
|
||||
==================================
|
||||
|
||||
For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
|
||||
see the developer's guide at `<https://github.com/ROCm/vllm/tree/7bb0618b1fe725b7d4fad9e525aa44da12c94a8b/docs/dev-docker>`__.
|
||||
|
||||
System validation
|
||||
=================
|
||||
|
||||
Before running AI workloads, it's important to validate that your AMD hardware is configured
|
||||
correctly and performing optimally.
|
||||
|
||||
To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU
|
||||
might hang until the periodic balancing is finalized. For more information,
|
||||
see the :ref:`system validation steps <rocm-for-ai-system-optimization>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# disable automatic NUMA balancing
|
||||
sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
# check if NUMA balancing is disabled (returns 0 if disabled)
|
||||
cat /proc/sys/kernel/numa_balancing
|
||||
0
|
||||
|
||||
To test for optimal performance, consult the recommended :ref:`System health benchmarks
|
||||
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
|
||||
system's configuration.
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Download the `ROCm vLLM Docker image <{{ unified_docker.docker_hub_url }}>`_.
|
||||
Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull {{ unified_docker.pull_tag }}
|
||||
|
||||
Benchmarking
|
||||
============
|
||||
|
||||
Once the setup is complete, choose between two options to reproduce the
|
||||
benchmark results:
|
||||
|
||||
.. _vllm-benchmark-mad:
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{model.mad_tag}}
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
Use this command to run the performance benchmark test on the `{{model.model}} <{{ model.url }}>`_ model
|
||||
using one GPU with the ``{{model.precision}}`` data type on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
python3 tools/run_models.py --tags {{model.mad_tag}} --keep-model-dir --live-output --timeout 28800
|
||||
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-{{model.mad_tag}}``. The latency and throughput reports of the
|
||||
model are collected in the following path: ``~/MAD/reports_{{model.precision}}/``.
|
||||
|
||||
Although the :ref:`available models <vllm-benchmark-available-models>` are preconfigured
|
||||
to collect latency and throughput performance data, you can also change the benchmarking
|
||||
parameters. See the standalone benchmarking tab for more information.
|
||||
|
||||
{% if model.tunableop %}
|
||||
|
||||
.. note::
|
||||
|
||||
For improved performance, consider enabling :ref:`PyTorch TunableOp <mi300x-tunableop>`.
|
||||
TunableOp automatically explores different implementations and configurations of certain PyTorch
|
||||
operators to find the fastest one for your hardware.
|
||||
|
||||
By default, ``{{model.mad_tag}}`` runs with TunableOp disabled
|
||||
(see
|
||||
`<https://github.com/ROCm/MAD/blob/develop/models.json>`__). To
|
||||
enable it, edit the default run behavior in the ``models.json``
|
||||
configuration before running inference -- update the model's run
|
||||
``args`` by changing ``--tunableop off`` to ``--tunableop on``.
|
||||
|
||||
Enabling TunableOp triggers a two-pass run -- a warm-up followed by the performance-collection run.
|
||||
|
||||
{% endif %}
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
Run the vLLM benchmark tool independently by starting the
|
||||
`Docker container <{{ unified_docker.docker_hub_url }}>`_
|
||||
as shown in the following snippet.
|
||||
|
||||
.. code-block::
|
||||
|
||||
docker pull {{ unified_docker.pull_tag }}
|
||||
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name test {{ unified_docker.pull_tag }}
|
||||
|
||||
In the Docker container, clone the ROCm MAD repository and navigate to the
|
||||
benchmark scripts directory at ``~/MAD/scripts/vllm``.
|
||||
|
||||
.. code-block::
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/vllm
|
||||
|
||||
To start the benchmark, use the following command with the appropriate options.
|
||||
|
||||
.. code-block::
|
||||
|
||||
./vllm_benchmark_report.sh -s $test_option -m {{model.model_repo}} -g $num_gpu -d {{model.precision}}
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:align: center
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
* - ``$test_option``
|
||||
- latency
|
||||
- Measure decoding token latency
|
||||
|
||||
* -
|
||||
- throughput
|
||||
- Measure token generation throughput
|
||||
|
||||
* -
|
||||
- all
|
||||
- Measure both throughput and latency
|
||||
|
||||
* - ``$num_gpu``
|
||||
- 1 or 8
|
||||
- Number of GPUs
|
||||
|
||||
* - ``$datatype``
|
||||
- ``float16`` or ``float8``
|
||||
- Data type
|
||||
|
||||
.. note::
|
||||
|
||||
The input sequence length, output sequence length, and tensor parallel (TP) are
|
||||
already configured. You don't need to specify them with this script.
|
||||
|
||||
.. note::
|
||||
|
||||
If you encounter the following error, pass your access-authorized Hugging
|
||||
Face token to the gated models.
|
||||
|
||||
.. code-block::
|
||||
|
||||
OSError: You are trying to access a gated repo.
|
||||
|
||||
# pass your HF_TOKEN
|
||||
export HF_TOKEN=$your_personal_hf_token
|
||||
|
||||
Here are some examples of running the benchmark with various options.
|
||||
|
||||
* Latency benchmark
|
||||
|
||||
Use this command to benchmark the latency of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
|
||||
|
||||
.. code-block::
|
||||
|
||||
./vllm_benchmark_report.sh -s latency -m {{model.model_repo}} -g 8 -d {{model.precision}}
|
||||
|
||||
Find the latency report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_latency_report.csv``.
|
||||
|
||||
* Throughput benchmark
|
||||
|
||||
Use this command to benchmark the throughput of the {{model.model}} model on eight GPUs with ``{{model.precision}}`` precision.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./vllm_benchmark_report.sh -s throughput -m {{model.model_repo}} -g 8 -d {{model.precision}}
|
||||
|
||||
Find the throughput report at ``./reports_{{model.precision}}_vllm_rocm{{unified_docker.rocm_version}}/summary/{{model.model_repo.split('/', 1)[1] if '/' in model.model_repo else model.model_repo}}_throughput_report.csv``.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<style>
|
||||
mjx-container[jax="CHTML"][display="true"] {
|
||||
text-align: left;
|
||||
margin: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
.. note::
|
||||
|
||||
Throughput is calculated as:
|
||||
|
||||
- .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time
|
||||
|
||||
- .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`vllm-history` to find documentation for previous releases
|
||||
of the ``ROCm/vllm`` Docker image.
|
||||
@@ -18,58 +18,65 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
|
||||
- PyTorch version
|
||||
- Resources
|
||||
|
||||
* - 6.4.0
|
||||
- 0.9.0.1
|
||||
* - 6.4.1
|
||||
- 0.9.1
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <../vllm>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250702/images/sha256-45068a2079cb8df554ed777141bf0c67d6627c470a897256e60c9f262677faab>`_
|
||||
|
||||
* - 6.4.1
|
||||
- 0.9.0.1
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.9.0.1-20250605>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c>`_
|
||||
|
||||
* - 6.3.1
|
||||
- 0.8.5 (0.8.6.dev)
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.5-20250521>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`__
|
||||
|
||||
* - 6.3.1
|
||||
- 0.8.5
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.5-20250513>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4>`__
|
||||
|
||||
* - 6.3.1
|
||||
- 0.8.3
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.3-20250415>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845>`__
|
||||
|
||||
* - 6.3.1
|
||||
- 0.7.3
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.7.3-20250325>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`__
|
||||
|
||||
* - 6.3.1
|
||||
- 0.6.6
|
||||
- 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.6.6>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`__
|
||||
|
||||
* - 6.2.1
|
||||
- 0.6.4
|
||||
- 2.5.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.6.4>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4/images/sha256-ccbb74cc9e7adecb8f7bdab9555f7ac6fc73adb580836c2a35ca96ff471890d8>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4/images/sha256-ccbb74cc9e7adecb8f7bdab9555f7ac6fc73adb580836c2a35ca96ff471890d8>`__
|
||||
|
||||
* - 6.2.0
|
||||
- 0.4.3
|
||||
- 2.4.0
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.4.3>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50/images/sha256-9e4dd4788a794c3d346d7d0ba452ae5e92d39b8dfac438b2af8efdc7f15d22c0>`_
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50/images/sha256-9e4dd4788a794c3d346d7d0ba452ae5e92d39b8dfac438b2af8efdc7f15d22c0>`__
|
||||
|
||||
@@ -32,10 +32,10 @@ PyTorch inference performance testing
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row">
|
||||
<div class="col-2 me-2 model-param-head">Model group</div>
|
||||
<div class="col-2 me-2 model-param-head">Model</div>
|
||||
<div class="row col-10">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-4 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
<div class="col-3 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -103,7 +103,7 @@ PyTorch inference performance testing
|
||||
|
||||
The Chai-1 benchmark uses a specifically selected Docker image using ROCm 6.2.3 and PyTorch 2.3.0 to address an accuracy issue.
|
||||
|
||||
.. container:: model-doc pyt_clip_inference pyt_mochi_video_inference
|
||||
.. container:: model-doc pyt_clip_inference pyt_mochi_video_inference pyt_wan2.1_inference
|
||||
|
||||
Use the following command to pull the `ROCm PyTorch Docker image <https://hub.docker.com/layers/rocm/pytorch/latest/images/sha256-05b55983e5154f46e7441897d0908d79877370adca4d1fff4899d9539d6c4969>`_ from Docker Hub.
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ vLLM inference performance testing
|
||||
==================================
|
||||
|
||||
For information on experimental features and known issues related to ROCm optimization efforts on vLLM,
|
||||
see the developer's guide at `<https://github.com/ROCm/vllm/tree/7bb0618b1fe725b7d4fad9e525aa44da12c94a8b/docs/dev-docker>`__.
|
||||
see the developer's guide at `<https://github.com/ROCm/vllm/tree/5486e7bc8523be0324ccd68f221959445b56cc2a/docs/dev-docker>`__.
|
||||
|
||||
System validation
|
||||
=================
|
||||
@@ -325,22 +325,22 @@ Further reading
|
||||
see `<https://github.com/ROCm/vllm/tree/main/benchmarks>`_.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`../../inference-optimization/workload`.
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn how to run LLM models from Hugging Face or your own model, see
|
||||
:doc:`Running models from Hugging Face <../hugging-face-models>`.
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to optimize inference on LLMs, see
|
||||
:doc:`Inference optimization <../../inference-optimization/index>`.
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- To learn how to fine-tune LLMs, see
|
||||
:doc:`Fine-tuning LLMs <../../fine-tuning/index>`.
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`previous-versions/vllm-history` to find documentation for previous releases
|
||||
of the ``ROCm/vllm`` Docker image.
|
||||
of the ``ROCm/vllm`` Docker image.
|
||||
|
||||
@@ -14,14 +14,14 @@ Throughout the following topics, this section provides a comprehensive guide to
|
||||
The AI Developer Hub contains `AMD ROCm tutorials <https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/>`_ for
|
||||
training, fine-tuning, and inference. It leverages popular machine learning frameworks on AMD GPUs.
|
||||
|
||||
- :doc:`Installing ROCm and machine learning frameworks <install>`
|
||||
- :doc:`Installing ROCm and machine learning frameworks <../install>`
|
||||
|
||||
- :doc:`Running models from Hugging Face <hugging-face-models>`
|
||||
|
||||
- :doc:`LLM inference frameworks <llm-inference-frameworks>`
|
||||
|
||||
- :doc:`vLLM inference performance testing <vllm-benchmark>`
|
||||
- :doc:`vLLM inference performance testing <benchmark-docker/vllm>`
|
||||
|
||||
- :doc:`PyTorch inference performance testing <pytorch-inference-benchmark>`
|
||||
- :doc:`PyTorch inference performance testing <benchmark-docker/pytorch-inference>`
|
||||
|
||||
- :doc:`Deploying your model <deploy-your-model>`
|
||||
|
||||
@@ -141,7 +141,7 @@ Installing vLLM
|
||||
|
||||
ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM
|
||||
on the MI300X accelerator. The Docker image includes ROCm, vLLM, and PyTorch.
|
||||
For more information, see :doc:`vllm-benchmark`.
|
||||
For more information, see :doc:`/how-to/rocm-for-ai/inference/benchmark-docker/vllm`.
|
||||
|
||||
.. _fine-tuning-llms-tgi:
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ ROCm supports multiple :doc:`installation methods <rocm-install-on-linux:install
|
||||
|
||||
* :doc:`Using your Linux distribution's package manager <rocm-install-on-linux:install/install-methods/package-manager-index>`
|
||||
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/amdgpu-install>`
|
||||
* :doc:`Using the AMDGPU installer <rocm-install-on-linux:install/install-methods/amdgpu-installer-index>`
|
||||
|
||||
* :ref:`Multi-version installation <rocm-install-on-linux:installation-types>`
|
||||
|
||||
|
||||
@@ -160,12 +160,14 @@ Download the Docker image
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: Ubuntu 24.04 + Python 3.12
|
||||
:sync: py312
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull rocm/megatron-lm:v25.5_py312
|
||||
|
||||
.. tab-item:: Ubuntu 22.04 + Python 3.10
|
||||
:sync: py310
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
@@ -173,9 +175,22 @@ Download the Docker image
|
||||
|
||||
2. Launch the Docker container.
|
||||
|
||||
.. code-block:: shell
|
||||
.. tab-set::
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --device /dev/infiniband --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 64G --name megatron_training_env rocm/megatron-lm:v25.5
|
||||
.. tab-item:: Ubuntu 24.04 + Python 3.12
|
||||
:sync: py312
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --device /dev/infiniband --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 128G --name megatron_training_env rocm/megatron-lm:v25.5_py312
|
||||
|
||||
|
||||
.. tab-item:: Ubuntu 22.04 + Python 3.10
|
||||
:sync: py310
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it --device /dev/dri --device /dev/kfd --device /dev/infiniband --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v $HOME/.ssh:/root/.ssh --shm-size 128G --name megatron_training_env rocm/megatron-lm:v25.5_py310
|
||||
|
||||
3. Use these commands if you exit the ``megatron_training_env`` container and need to return to it.
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ previous releases of the ``ROCm/megatron-lm`` Docker image on `Docker Hub <https
|
||||
- 6.3.4
|
||||
- 2.8.0a0+gite2f9759
|
||||
-
|
||||
* `Documentation <../megatron-lm>`_
|
||||
* :doc:`Documentation <../megatron-lm>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/megatron-lm/v25.5_py312/images/sha256-4506f18ba188d24189c6b1f95130b425f52c528a543bb3f420351824edceadc2>`_
|
||||
|
||||
* - v25.4
|
||||
|
||||
@@ -102,7 +102,8 @@ the output is ``1``, run the following command to disable NUMA auto-balancing.
|
||||
|
||||
sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
|
||||
See :ref:`mi300x-disable-numa` for more information.
|
||||
See :ref:`System validation and optimization <rocm-for-ai-system-optimization>`
|
||||
for more information.
|
||||
|
||||
Hardware verification with ROCm
|
||||
-------------------------------
|
||||
@@ -118,7 +119,7 @@ Run the command:
|
||||
|
||||
rocm-smi --setperfdeterminism 1900
|
||||
|
||||
See :ref:`mi300x-hardware-verification-with-rocm` for more information.
|
||||
See `Hardware verification with ROCm <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html#hardware-verification-with-rocm>`_ for more information.
|
||||
|
||||
RCCL Bandwidth Test
|
||||
-------------------
|
||||
@@ -171,7 +172,7 @@ Run on 8 GPUs (``-g 8``), scanning from 8 bytes to 10 GB:
|
||||
|
||||
./build/all_reduce_perf -b 8 -e 10G -f 2 -g 8
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/rccl-tests-8-gpu.png
|
||||
.. image:: /data/how-to/rocm-for-ai/rccl-tests-8-gpu.png
|
||||
:width: 800
|
||||
|
||||
Using one MPI process per GPU and ``-g 1`` for performance-oriented runs on both single-node and multi-node is
|
||||
@@ -181,7 +182,7 @@ recommended. So, a run on 8 GPUs looks something like:
|
||||
|
||||
mpirun -np 8 --bind-to numa ./build/all_reduce_perf -b 8 -e 10G -f 2 -g 1
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/rccl-tests-1-mpi-process-per-gpu.png
|
||||
.. image:: /data/how-to/rocm-for-ai/rccl-tests-1-mpi-process-per-gpu.png
|
||||
:width: 800
|
||||
|
||||
Running with one MPI process per GPU ensures a one-to-one mapping for CPUs and GPUs, which can be beneficial
|
||||
@@ -271,7 +272,7 @@ end-of-document token, remove sentence splitting, and use the tokenizer type.
|
||||
In this case, the automatically generated output files are named ``my-gpt2_text_document.bin`` and
|
||||
``my-gpt2_text_document.idx``.
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/prep-training-datasets-my-gpt2-text-document.png
|
||||
.. image:: /data/how-to/rocm-for-ai/prep-training-datasets-my-gpt2-text-document.png
|
||||
:width: 800
|
||||
|
||||
.. _amd-megatron-lm-environment-setup:
|
||||
@@ -469,7 +470,7 @@ Benchmarking examples
|
||||
|
||||
See the sample output:
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
.. image:: /data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
:width: 800
|
||||
|
||||
.. tab-item:: Multi node training
|
||||
@@ -500,12 +501,12 @@ Benchmarking examples
|
||||
|
||||
Master node:
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
:width: 800
|
||||
|
||||
Worker node:
|
||||
|
||||
.. image:: ../../data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
:width: 800
|
||||
|
||||
Previous versions
|
||||
|
||||
@@ -111,7 +111,8 @@ the output is ``1``, run the following command to disable NUMA auto-balancing.
|
||||
|
||||
sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
|
||||
See :ref:`mi300x-disable-numa` for more information.
|
||||
See :ref:`System validation and optimization <rocm-for-ai-system-optimization>`
|
||||
for more information.
|
||||
|
||||
.. _mi300x-amd-megatron-lm-training:
|
||||
|
||||
@@ -489,7 +490,7 @@ Benchmarking examples
|
||||
|
||||
See the sample output:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
.. image:: /data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
:width: 800
|
||||
|
||||
.. tab-item:: Multi-node training
|
||||
@@ -520,12 +521,12 @@ Benchmarking examples
|
||||
|
||||
Master node:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
:width: 800
|
||||
|
||||
Worker node:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
:width: 800
|
||||
|
||||
Previous versions
|
||||
|
||||
@@ -572,7 +572,7 @@ Benchmarking examples
|
||||
|
||||
See the sample output:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
.. image:: /data/how-to/rocm-for-ai/llama2-7b-training-log-sample.png
|
||||
:width: 800
|
||||
|
||||
.. tab-item:: Multi-node training
|
||||
@@ -603,12 +603,12 @@ Benchmarking examples
|
||||
|
||||
Master node:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-master.png
|
||||
:width: 800
|
||||
|
||||
Worker node:
|
||||
|
||||
.. image:: ../../../../data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
.. image:: /data/how-to/rocm-for-ai/2-node-training-worker.png
|
||||
:width: 800
|
||||
|
||||
Previous versions
|
||||
|
||||
@@ -80,7 +80,8 @@ the output is ``1``, run the following command to disable NUMA auto-balancing.
|
||||
|
||||
sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
|
||||
|
||||
See :ref:`mi300x-disable-numa` for more information.
|
||||
See :ref:`System validation and optimization <rocm-for-ai-system-optimization>`
|
||||
for more information.
|
||||
|
||||
Environment setup
|
||||
=================
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: How to train a model using PyTorch for ROCm.
|
||||
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
|
||||
|
||||
@@ -76,14 +76,6 @@ Ubuntu versions.
|
||||
single node workstations, multi and many-core nodes, clusters of nodes via
|
||||
QMP, and classic vector computers.
|
||||
|
||||
* -
|
||||
- `Grid <https://github.com/amd/InfinityHub-CI/tree/main/grid/>`_
|
||||
- Grid is a library for lattice QCD calculations that employs a high-level data parallel
|
||||
approach while using a number of techniques to target multiple types of parallelism.
|
||||
The library currently supports MPI, OpenMP and short vector parallelism. The SIMD
|
||||
instructions sets covered include SSE, AVX, AVX2, FMA4, IMCI and AVX512. Recent
|
||||
releases expanded this support to include GPU offloading.
|
||||
|
||||
* -
|
||||
- `MILC <https://github.com/amd/InfinityHub-CI/tree/main/milc/>`_
|
||||
- The MILC Code is a set of research codes developed by MIMD Lattice Computation
|
||||
@@ -237,12 +229,18 @@ Ubuntu versions.
|
||||
of these applications.
|
||||
|
||||
* - Tools and libraries
|
||||
- `ROCm with GPU-aware MPI container <https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker>`_
|
||||
- `AMD ROCm with OpenMPI container <https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker>`_
|
||||
- Base container for GPU-aware MPI with ROCm for HPC applications. This
|
||||
project provides a boilerplate for building and running a Docker
|
||||
container with ROCm supporting GPU-aware MPI implementations using
|
||||
OpenMPI or UCX.
|
||||
|
||||
|
||||
* -
|
||||
- `AMD ROCm with MPICH container <https://github.com/amd/InfinityHub-CI/tree/main/base-mpich-rocm-docker>`_
|
||||
- Base container for GPU-aware MPI with ROCm for HPC applications. This
|
||||
project provides a boilerplate for building and running a Docker
|
||||
container with ROCm supporting GPU-aware MPI implementations using MPICH.
|
||||
|
||||
* -
|
||||
- `Kokkos <https://github.com/amd/InfinityHub-CI/tree/main/kokkos>`_
|
||||
- Kokkos is a programming model in C++ for writing performance portable
|
||||
|
||||
@@ -12,8 +12,7 @@ accelerators. They include detailed instructions on system settings and
|
||||
application tuning suggestions to help you fully leverage the capabilities of
|
||||
these accelerators, thereby achieving optimal performance.
|
||||
|
||||
* :doc:`../../rocm-for-ai/inference/vllm-benchmark`
|
||||
* :doc:`../../rocm-for-ai/inference-optimization/workload`
|
||||
* :doc:`/how-to/rocm-for-ai/inference-optimization/workload`
|
||||
* `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_
|
||||
|
||||
|
||||
|
||||
@@ -215,9 +215,9 @@ sphinx==8.1.3
|
||||
# sphinx-copybutton
|
||||
# sphinx-design
|
||||
# sphinx-external-toc
|
||||
# sphinx-last-updated-by-git
|
||||
# sphinx-notfound-page
|
||||
# sphinx-reredirects
|
||||
# sphinx-sitemap
|
||||
# sphinxcontrib-datatemplates
|
||||
# sphinxcontrib-runcmd
|
||||
sphinx-book-theme==1.1.4
|
||||
@@ -228,11 +228,13 @@ sphinx-design==0.6.1
|
||||
# via rocm-docs-core
|
||||
sphinx-external-toc==1.0.1
|
||||
# via rocm-docs-core
|
||||
sphinx-last-updated-by-git==0.3.8
|
||||
# via sphinx-sitemap
|
||||
sphinx-notfound-page==1.1.0
|
||||
# via rocm-docs-core
|
||||
sphinx-reredirects==0.1.6
|
||||
# via -r requirements.in
|
||||
sphinx-sitemap==2.6.0
|
||||
sphinx-sitemap==2.7.2
|
||||
# via -r requirements.in
|
||||
sphinxcontrib-applehelp==2.0.0
|
||||
# via sphinx
|
||||
|
||||
@@ -98,7 +98,7 @@ System Management
|
||||
.. csv-table::
|
||||
:header: "Component", "Description"
|
||||
|
||||
":doc:`AMD SMI <amdsmi:index>`", "C library for Linux that provides a user space interface for applications to monitor and control AMD devices"
|
||||
":doc:`AMD SMI <amdsmi:index>`", "System management interface to control AMD GPU settings, monitor performance, and retrieve device and process information"
|
||||
":doc:`ROCm Data Center Tool <rdc:index>`", "Simplifies administration and addresses key infrastructure challenges in AMD GPUs in cluster and data-center environments"
|
||||
":doc:`rocminfo <rocminfo:index>`", "Reports system information"
|
||||
":doc:`ROCm SMI <rocm_smi_lib:index>`", "C library for Linux that provides a user space interface for applications to monitor and control GPU applications"
|
||||
|
||||
Reference in New Issue
Block a user