diff --git a/.azuredevops/ci-builds/aomp-mainline.yml b/.azuredevops/ci-builds/aomp-mainline.yml new file mode 100644 index 000000000..4cc732587 --- /dev/null +++ b/.azuredevops/ci-builds/aomp-mainline.yml @@ -0,0 +1,42 @@ +variables: +- group: common +- template: /.azuredevops/variables-global.yml + +resources: + repositories: + - repository: aomp_repo + type: github + endpoint: ROCm + name: ROCm/aomp + ref: amd-mainline-open + - repository: aomp-extras_repo + type: github + endpoint: ROCm + name: ROCm/aomp-extras + ref: amd-mainline-open + - repository: flang_repo + type: github + endpoint: ROCm + name: ROCm/flang + ref: amd-mainline-open + - repository: llvm-project_repo + type: github + endpoint: ROCm + name: ROCm/llvm-project + ref: amd-mainline-open + pipelines: + - pipeline: rocr-runtime_pipeline + source: \ROCR-Runtime + trigger: + branches: + include: + - amd-master +# this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime + +trigger: none +pr: none + +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/aomp.yml + parameters: + checkoutRepo: aomp_repo diff --git a/.azuredevops/ci-builds/aomp.yml b/.azuredevops/ci-builds/aomp-staging.yml similarity index 92% rename from .azuredevops/ci-builds/aomp.yml rename to .azuredevops/ci-builds/aomp-staging.yml index 335108adc..57a55cbf3 100644 --- a/.azuredevops/ci-builds/aomp.yml +++ b/.azuredevops/ci-builds/aomp-staging.yml @@ -27,7 +27,10 @@ resources: pipelines: - pipeline: rocr-runtime_pipeline source: \ROCR-Runtime - trigger: true + trigger: + branches: + include: + - amd-staging # this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime trigger: none diff --git a/.azuredevops/components/AMDMIGraphX.yml b/.azuredevops/components/AMDMIGraphX.yml index 86b901944..708b5c657 100644 --- a/.azuredevops/components/AMDMIGraphX.yml +++ b/.azuredevops/components/AMDMIGraphX.yml @@ -27,12 +27,12 @@ parameters: type: object default: - https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz - - onnx==1.14.1 - - numpy==1.21.6 - - typing==3.7.4 - - pytest==6.0.1 - - packaging==23.0 - - protobuf==3.20.2 + - onnx>=1.14.1 + - numpy>=1.21.6 + - typing>=3.7.4 + - pytest>=6.0.1 + - packaging>=23.0 + - protobuf>=3.20.2 - name: rocmDependencies type: object default: @@ -51,6 +51,26 @@ parameters: - rocminfo - rocMLIR - rocprofiler-register + - roctracer +- name: rocmTestDependencies + type: object + default: + - aomp + - aomp-extras + - clr + - composable_kernel + - hipBLAS + - hipBLAS-common + - hipBLASLt + - llvm-project + - MIOpen + - rocm-cmake + - ROCR-Runtime + - rocBLAS + - rocminfo + - rocMLIR + - rocprofiler-register + - roctracer jobs: - job: AMDMIGraphX @@ -64,8 +84,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -104,7 +122,76 @@ jobs: -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include -DBUILD_TESTING=ON -GNinja -# REFERENCE: https://github.com/ROCm/composable_kernel/issues/782 - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml parameters: gpuTarget: $(JOB_GPU_TARGET) + +- job: AMDMIGraphX_testing + dependsOn: AMDMIGraphX + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) + variables: + - group: common + - template: /.azuredevops/variables-global.yml + - name: MIGRAPHX_TRACE_BENCHMARKING + value: 1 + pool: $(JOB_TEST_POOL) + workspace: + clean: all + strategy: + matrix: + gfx942: + JOB_GPU_TARGET: gfx942 + JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml + parameters: + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds +# half version should be fixed to 5.6.0 + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencySource: fixed + fixedComponentName: half + fixedPipelineIdentifier: ${{ variables.HALF560_PIPELINE_ID }} + skipLibraryLinking: true + skipLlvmSymlink: true + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmTestDependencies }} + gpuTarget: $(JOB_GPU_TARGET) + # CI case: download latest default branch build + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + # manual build case: triggered by ROCm/ROCm repo + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - task: CMake@1 + displayName: MIGraphXTest CMake Flags + inputs: + cmakeArgs: >- + -DCMAKE_BUILD_TYPE=Release + -DGPU_TARGETS=$(JOB_GPU_TARGET) + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) + -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm + -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include + -DBUILD_TESTING=ON + -DMIGRAPHX_ENABLE_C_API_TEST=ON + .. + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: AMDMIGraphX + testExecutable: make + testParameters: -j$(nproc) check + testPublishResults: false diff --git a/.azuredevops/components/MIOpen.yml b/.azuredevops/components/MIOpen.yml index dce0edef0..e87cd0233 100644 --- a/.azuredevops/components/MIOpen.yml +++ b/.azuredevops/components/MIOpen.yml @@ -8,15 +8,24 @@ parameters: - name: aptPackages type: object default: - - software-properties-common - - python3-pip - cmake - - ninja-build - - libsqlite3-dev - libbz2-dev - - nlohmann-json3-dev - - libgtest-dev - libdrm-dev + - libeigen3-dev + - libgmock-dev + - libgtest-dev + - libsqlite3-dev + - libstdc++-12-dev + - libzstd-dev + - ninja-build + - nlohmann-json3-dev + - python3-pip + - software-properties-common + - zstd +- name: pipModules + type: object + default: + - cget - name: rocmDependencies type: object default: @@ -35,6 +44,24 @@ parameters: - clr - rocminfo - roctracer +- name: rocmTestDependencies + type: object + default: + - clr + - composable_kernel + - half + - hipBLAS + - hipBLAS-common + - hipBLASLt + - llvm-project + - rocBLAS + - rocm-cmake + - rocminfo + - rocMLIR + - ROCR-Runtime + - rocprofiler-register + - rocRAND + - roctracer jobs: - job: MIOpen @@ -48,28 +75,17 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} - # The default boost library from apt is 1.74, which does not satisfy MIOpen's build requirement (1.79+) - # Upgrade boost from apt by following https://launchpad.net/~mhier/+archive/ubuntu/libboost-latest - - task: Bash@3 - displayName: 'update boost version' - inputs: - targetType: inline - script: sudo add-apt-repository ppa:mhier/libboost-latest -y - - task: Bash@3 - displayName: 'install boost' - inputs: - targetType: inline - script: sudo apt-get --yes install libboost1.83-dev libboost-system1.83-dev libboost-filesystem1.83-dev + pipModules: ${{ parameters.pipModules }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: checkoutRepo: ${{ parameters.checkoutRepo }} + # The default boost library from apt is 1.74, which does not satisfy MIOpen's build requirement (1.79+) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-boost.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml parameters: dependencyList: ${{ parameters.rocmDependencies }} @@ -85,7 +101,8 @@ jobs: extraBuildFlags: >- -DMIOPEN_BACKEND=HIP -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ - -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/boost + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) -DMIOPEN_ENABLE_AI_KERNEL_TUNING=OFF -DMIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK=OFF -DCMAKE_BUILD_TYPE=Release @@ -94,3 +111,102 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml parameters: gpuTarget: $(JOB_GPU_TARGET) + +- job: MIOpen_testing + dependsOn: MIOpen + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: $(JOB_TEST_POOL) + workspace: + clean: all + strategy: + matrix: + gfx942: + JOB_GPU_TARGET: gfx942 + JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} + # The default boost library from apt is 1.74, which does not satisfy MIOpen's build requirement (1.79+) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-boost.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml + parameters: + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmTestDependencies }} + gpuTarget: $(JOB_GPU_TARGET) + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds +# MIOpen depends on a specific version of frugally-deep which is forked here: https://github.com/ROCm/frugally-deep +# https://github.com/ROCm/frugally-deep/blob/master/INSTALL.md + - task: Bash@3 + displayName: Add Python site-packages binaries to path + inputs: + targetType: inline + script: | + USER_BASE=$(python3 -m site --user-base) + echo "##vso[task.prependpath]$USER_BASE/bin" + - task: Bash@3 + displayName: Install FunctionalPlus + inputs: + targetType: inline + script: cget install Dobiasd/FunctionalPlus + - task: Bash@3 + displayName: Remove Python site-packages binaries from path + inputs: + targetType: inline + script: | + USER_BASE=$(python3 -m site --user-base) + echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$USER_BASE/bin;;' -e 's;^/;;' -e 's;/$;;')" + - task: Bash@3 + displayName: git clone frugally-deep + inputs: + targetType: inline + script: git clone https://github.com/ROCm/frugally-deep --depth=1 --shallow-submodules + workingDirectory: $(Build.SourcesDirectory) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml + parameters: + componentName: frugally-deep + cmakeBuildDir: $(Build.SourcesDirectory)/frugally-deep/build + installDir: $(Build.SourcesDirectory)/bin + extraBuildFlags: -DCMAKE_PREFIX_PATH=$(Build.SourcesDirectory)/cget/cget/pkg/Dobiasd__FunctionalPlus/install + - task: CMake@1 + displayName: 'MIOpen Test CMake Flags' + inputs: + cmakeArgs: >- + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Build.SourcesDirectory)/bin;$(Build.SourcesDirectory)/cget/cget/pkg/Dobiasd__FunctionalPlus/install;$(Agent.BuildDirectory)/boost + -DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocm + -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ + -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang + -DMIOPEN_BACKEND=HIP + -DMIOPEN_TEST_FLAGS=" --disable-verification-cache" + -DCMAKE_BUILD_TYPE=release + -DBUILD_DEV=OFF + -DMIOPEN_USE_MLIR=ON + -DMIOPEN_GPU_SYNC=OFF + .. + - task: Bash@3 + displayName: 'MIOpen Test Build' + inputs: + targetType: inline + script: | + cmake --build . --target tests -- -j$(nproc) + workingDirectory: $(Build.SourcesDirectory)/build + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: MIOpen diff --git a/.azuredevops/components/MIVisionX.yml b/.azuredevops/components/MIVisionX.yml index 5082d3971..4af1a2212 100644 --- a/.azuredevops/components/MIVisionX.yml +++ b/.azuredevops/components/MIVisionX.yml @@ -86,8 +86,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -120,7 +118,7 @@ jobs: - job: MIVisionX_testing dependsOn: MIVisionX - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -162,6 +160,7 @@ jobs: mkdir mivisionx-tests cd mivisionx-tests cmake /opt/rocm/share/mivisionx/test + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: MIVisionX diff --git a/.azuredevops/components/ROCR-Runtime.yml b/.azuredevops/components/ROCR-Runtime.yml index 43e3f8bbd..c99561b95 100644 --- a/.azuredevops/components/ROCR-Runtime.yml +++ b/.azuredevops/components/ROCR-Runtime.yml @@ -59,7 +59,7 @@ jobs: - job: ROCR_Runtime_testing dependsOn: ROCR_Runtime - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -102,8 +102,12 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: checkoutRepo: ${{ parameters.checkoutRepo }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + parameters: + runRocminfo: false - task: Bash@3 displayName: Build kfdtest + continueOnError: true inputs: targetType: 'inline' workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest @@ -119,6 +123,7 @@ jobs: testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts - task: Bash@3 displayName: Build rdmatest app + continueOnError: true inputs: targetType: 'inline' workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/app @@ -127,6 +132,7 @@ jobs: cmake --build . - task: Bash@3 displayName: Build rdmatest driver + continueOnError: true inputs: targetType: 'inline' workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/drv @@ -136,6 +142,7 @@ jobs: RDMA_HEADER_DIR=/usr/src/amdgpu-*/include make all - task: Bash@3 displayName: Install rdmatest driver + continueOnError: true inputs: targetType: 'inline' workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/rdma/simple/drv @@ -151,6 +158,7 @@ jobs: testPublishResults: false - task: Bash@3 displayName: Build rocrtst + continueOnError: true inputs: targetType: 'inline' workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common diff --git a/.azuredevops/components/ROCgdb.yml b/.azuredevops/components/ROCgdb.yml index 76a314798..d41a80802 100644 --- a/.azuredevops/components/ROCgdb.yml +++ b/.azuredevops/components/ROCgdb.yml @@ -32,6 +32,7 @@ parameters: jobs: - job: ROCgdb + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -100,6 +101,7 @@ jobs: sudo rm -rf /opt/rocm sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm echo "##vso[task.prependpath]/opt/rocm/bin" + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - task: Bash@3 displayName: check-gdb continueOnError: true diff --git a/.azuredevops/components/ROCmValidationSuite.yml b/.azuredevops/components/ROCmValidationSuite.yml index 9a27ff6be..14dd14c33 100644 --- a/.azuredevops/components/ROCmValidationSuite.yml +++ b/.azuredevops/components/ROCmValidationSuite.yml @@ -13,6 +13,7 @@ parameters: - libyaml-cpp-dev - libpci-dev - libpci3 + - libgst-dev - libgtest-dev - git - name: rocmDependencies @@ -40,6 +41,7 @@ parameters: - llvm-project - rocBLAS - rocm_smi_lib + - rocminfo - rocprofiler-register - ROCR-Runtime - rocRAND @@ -64,8 +66,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -98,7 +98,7 @@ jobs: - job: ROCmValidationSuite_testing dependsOn: ROCmValidationSuite - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -132,6 +132,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: ROCmValidationSuite diff --git a/.azuredevops/components/Tensile.yml b/.azuredevops/components/Tensile.yml new file mode 100644 index 000000000..4a7716cf5 --- /dev/null +++ b/.azuredevops/components/Tensile.yml @@ -0,0 +1,175 @@ +parameters: +- name: checkoutRepo + type: string + default: 'self' +- name: checkoutRef + type: string + default: '' +- name: aptPackages + type: object + default: + - python3-pip + - cmake + - libmsgpack-dev + - libboost-program-options-dev +- name: pipModules + type: object + default: + - tox + - pytest +- name: rocmDependencies + type: object + default: + - aomp + - clr + - llvm-project + - rocm-cmake + - rocm-core + - rocminfo + - rocm_smi_lib + - rocprofiler-register + - ROCR-Runtime + +jobs: +- job: Tensile + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: + vmImage: ${{ variables.BASE_BUILD_POOL }} + workspace: + clean: all + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmDependencies }} + # CI case: download latest default branch build + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + # manual build case: triggered by ROCm/ROCm repo + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - task: Bash@3 + displayName: Create wheel file + inputs: + targetType: inline + script: python3 setup.py bdist_wheel + workingDirectory: $(Build.SourcesDirectory) + env: + ROCM_PATH: $(Agent.BuildDirectory)/rocm + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml + parameters: + sourceDir: $(Build.SourcesDirectory)/dist + contentsString: '*.whl' + targetDir: $(Build.ArtifactStagingDirectory) + clean: false + - task: PublishPipelineArtifact@1 + displayName: 'wheel file Publish' + retryCountOnTaskFailure: 3 + inputs: + targetPath: $(Build.ArtifactStagingDirectory) + +- job: Tensile_testing + dependsOn: Tensile + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: $(JOB_TEST_POOL) + workspace: + clean: all + strategy: + matrix: + gfx942: + JOB_GPU_TARGET: gfx942 + JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - task: DownloadPipelineArtifact@2 + displayName: 'Download Pipeline Wheel Files' + inputs: + itemPattern: '**/*.whl' + targetPath: $(Agent.BuildDirectory) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml + parameters: + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmDependencies }} + gpuTarget: $(JOB_GPU_TARGET) + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - task: Bash@3 + displayName: pip install + inputs: + targetType: inline + script: find -name *.whl -exec pip install {} \; + workingDirectory: $(Agent.BuildDirectory) + - task: Bash@3 + displayName: Setup test environment + inputs: + targetType: inline + script: | + sudo rm -rf /opt/rocm + sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm + - task: Bash@3 + displayName: Add Python site-packages binaries to path + inputs: + targetType: inline + script: | + USER_BASE=$(python3 -m site --user-base) + echo "##vso[task.prependpath]$USER_BASE/bin" + - task: Bash@3 + displayName: Add ROCm binaries to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin" + - task: Bash@3 + displayName: Add ROCm compilers to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin" + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + - task: Bash@3 + displayName: tox test + inputs: + targetType: inline + script: tox run -v -e ci -- -m pre_checkin + workingDirectory: $(Build.SourcesDirectory) + - task: Bash@3 + displayName: Remove Python site-packages binaries from path + inputs: + targetType: inline + script: | + USER_BASE=$(python3 -m site --user-base) + echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$USER_BASE/bin;;' -e 's;^/;;' -e 's;/$;;')" + - task: Bash@3 + displayName: Remove ROCm binaries from PATH + inputs: + targetType: inline + script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')" + - task: Bash@3 + displayName: Remove ROCm compilers from PATH + inputs: + targetType: inline + script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/llvm/bin;;' -e 's;^/;;' -e 's;/$;;')" diff --git a/.azuredevops/components/amdsmi.yml b/.azuredevops/components/amdsmi.yml index b6d8a2ec8..5b10addef 100644 --- a/.azuredevops/components/amdsmi.yml +++ b/.azuredevops/components/amdsmi.yml @@ -35,7 +35,7 @@ jobs: - job: amdsmi_testing dependsOn: amdsmi - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -53,6 +53,9 @@ jobs: aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + parameters: + runRocminfo: false - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: amdsmi diff --git a/.azuredevops/components/aomp.yml b/.azuredevops/components/aomp.yml index e8bbcdfe5..9a443ac3c 100644 --- a/.azuredevops/components/aomp.yml +++ b/.azuredevops/components/aomp.yml @@ -413,7 +413,7 @@ jobs: - job: aomp_testing dependsOn: aomp - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -472,6 +472,7 @@ jobs: Contents: FileCheck TargetFolder: $(Agent.BuildDirectory)/rocm/share/openmp-extras/tests/bin retryCount: 3 + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - task: Bash@3 displayName: Test AOMP continueOnError: true diff --git a/.azuredevops/components/composable_kernel.yml b/.azuredevops/components/composable_kernel.yml index 2d185cda2..3550ec728 100644 --- a/.azuredevops/components/composable_kernel.yml +++ b/.azuredevops/components/composable_kernel.yml @@ -48,8 +48,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -101,7 +99,7 @@ jobs: - job: composable_kernel_testing dependsOn: composable_kernel - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -137,6 +135,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - task: Bash@3 displayName: Iterate through test scripts inputs: diff --git a/.azuredevops/components/copyHIP.yml b/.azuredevops/components/copyHIP.yml index a925202ed..a1770d393 100644 --- a/.azuredevops/components/copyHIP.yml +++ b/.azuredevops/components/copyHIP.yml @@ -25,7 +25,7 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-download.yml parameters: componentName: HIP - pipelineId: $(hip-pipeline-id) + pipelineId: $(HIP_PIPELINE_ID) - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml parameters: sourceDir: $(Agent.BuildDirectory)/rocm diff --git a/.azuredevops/components/hip-tests.yml b/.azuredevops/components/hip-tests.yml index f706291f9..a169e4e3b 100644 --- a/.azuredevops/components/hip-tests.yml +++ b/.azuredevops/components/hip-tests.yml @@ -52,8 +52,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -91,7 +89,7 @@ jobs: - job: hip_tests_testing timeoutInMinutes: 240 dependsOn: hip_tests - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -133,11 +131,11 @@ jobs: sudo rm -rf /opt/rocm sudo mkdir -p /opt/rocm/bin sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hip_tests testDir: $(Agent.BuildDirectory)/rocm/share/hip - reloadAMDGPU: true - task: Bash@3 displayName: Clean up symlink inputs: diff --git a/.azuredevops/components/hipBLAS-common.yml b/.azuredevops/components/hipBLAS-common.yml index 2fb39a591..775ec8ba3 100644 --- a/.azuredevops/components/hipBLAS-common.yml +++ b/.azuredevops/components/hipBLAS-common.yml @@ -29,7 +29,7 @@ jobs: - name: ROCM_PATH value: $(Agent.BuildDirectory)/rocm - template: /.azuredevops/variables-global.yml - pool: + pool: vmImage: ${{ variables.BASE_BUILD_POOL }} workspace: clean: all diff --git a/.azuredevops/components/hipBLAS.yml b/.azuredevops/components/hipBLAS.yml index c2ff23b11..e71232cbc 100644 --- a/.azuredevops/components/hipBLAS.yml +++ b/.azuredevops/components/hipBLAS.yml @@ -59,8 +59,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -98,7 +96,7 @@ jobs: - job: hipBLAS_testing dependsOn: hipBLAS - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -132,6 +130,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipBLAS diff --git a/.azuredevops/components/hipBLASLt.yml b/.azuredevops/components/hipBLASLt.yml index 0f2eb7c54..ee610bfdc 100644 --- a/.azuredevops/components/hipBLASLt.yml +++ b/.azuredevops/components/hipBLASLt.yml @@ -22,6 +22,7 @@ parameters: type: object default: - joblib + - packaging - name: rocmDependencies type: object default: @@ -74,8 +75,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -150,7 +149,7 @@ jobs: - job: hipBLASLt_testing dependsOn: hipBLASLt - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -166,6 +165,7 @@ jobs: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml parameters: @@ -184,6 +184,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipBLASLt diff --git a/.azuredevops/components/hipCUB.yml b/.azuredevops/components/hipCUB.yml index 29a7f07e0..e81c8be82 100644 --- a/.azuredevops/components/hipCUB.yml +++ b/.azuredevops/components/hipCUB.yml @@ -43,8 +43,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -78,7 +76,7 @@ jobs: - job: hipCUB_testing dependsOn: hipCUB - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -112,6 +110,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipCUB diff --git a/.azuredevops/components/hipFFT.yml b/.azuredevops/components/hipFFT.yml index 196a4c15b..38212a5cf 100644 --- a/.azuredevops/components/hipFFT.yml +++ b/.azuredevops/components/hipFFT.yml @@ -55,8 +55,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -96,7 +94,7 @@ jobs: - job: hipFFT_testing dependsOn: hipFFT - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -130,6 +128,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipFFT diff --git a/.azuredevops/components/hipRAND.yml b/.azuredevops/components/hipRAND.yml index c3d341f8f..767c9f7ab 100644 --- a/.azuredevops/components/hipRAND.yml +++ b/.azuredevops/components/hipRAND.yml @@ -45,8 +45,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -82,7 +80,7 @@ jobs: - job: hipRAND_testing dependsOn: hipRAND - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -116,6 +114,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipRAND diff --git a/.azuredevops/components/hipSOLVER.yml b/.azuredevops/components/hipSOLVER.yml index 7e7f6ee1f..60f2141d1 100644 --- a/.azuredevops/components/hipSOLVER.yml +++ b/.azuredevops/components/hipSOLVER.yml @@ -42,6 +42,7 @@ parameters: - ROCR-Runtime - rocSOLVER - rocSPARSE + - roctracer jobs: - job: hipSOLVER @@ -56,8 +57,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -102,7 +101,7 @@ jobs: - job: hipSOLVER_testing dependsOn: hipSOLVER - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -136,6 +135,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipSOLVER diff --git a/.azuredevops/components/hipSPARSE.yml b/.azuredevops/components/hipSPARSE.yml index f99a77aed..c433371f1 100644 --- a/.azuredevops/components/hipSPARSE.yml +++ b/.azuredevops/components/hipSPARSE.yml @@ -52,8 +52,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -97,7 +95,7 @@ jobs: - job: hipSPARSE_testing dependsOn: hipSPARSE - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -131,6 +129,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipSPARSE diff --git a/.azuredevops/components/hipSPARSELt.yml b/.azuredevops/components/hipSPARSELt.yml index f95bc52a1..67b8ba808 100644 --- a/.azuredevops/components/hipSPARSELt.yml +++ b/.azuredevops/components/hipSPARSELt.yml @@ -123,7 +123,7 @@ jobs: - job: hipSPARSELt_testing dependsOn: hipSPARSELt - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -151,6 +151,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipSPARSELt diff --git a/.azuredevops/components/hipTensor.yml b/.azuredevops/components/hipTensor.yml index 7cd71702b..d12cfaac3 100644 --- a/.azuredevops/components/hipTensor.yml +++ b/.azuredevops/components/hipTensor.yml @@ -42,8 +42,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -79,7 +77,7 @@ jobs: - job: hipTensor_testing timeoutInMinutes: 90 dependsOn: hipTensor - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -113,8 +111,9 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: hipTensor testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor' - testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes' + testParameters: '-E ".*-extended" -VV --output-on-failure --force-new-ctest-process --output-junit test_output.xml' diff --git a/.azuredevops/components/hipfort.yml b/.azuredevops/components/hipfort.yml index 034f36c14..3a3e70387 100644 --- a/.azuredevops/components/hipfort.yml +++ b/.azuredevops/components/hipfort.yml @@ -51,8 +51,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -94,7 +92,7 @@ jobs: - job: hipfort_testing dependsOn: hipfort - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -139,6 +137,7 @@ jobs: sudo rm -rf /opt/rocm sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm workingDirectory: $(Build.SourcesDirectory) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - task: Bash@3 displayName: 'Test hipfort' inputs: diff --git a/.azuredevops/components/omnitrace.yml b/.azuredevops/components/omnitrace.yml index 207584bc8..f4ca1a1ef 100644 --- a/.azuredevops/components/omnitrace.yml +++ b/.azuredevops/components/omnitrace.yml @@ -40,6 +40,7 @@ parameters: - name: rocmDependencies type: object default: + - aomp - clr - llvm-project - rccl @@ -63,8 +64,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: diff --git a/.azuredevops/components/rccl.yml b/.azuredevops/components/rccl.yml index 845598500..0a7b59578 100644 --- a/.azuredevops/components/rccl.yml +++ b/.azuredevops/components/rccl.yml @@ -60,8 +60,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -102,7 +100,7 @@ jobs: - job: rccl_testing timeoutInMinutes: 120 dependsOn: rccl - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -136,6 +134,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rccl diff --git a/.azuredevops/components/rdc.yml b/.azuredevops/components/rdc.yml index a4382e36e..f51b83a9c 100644 --- a/.azuredevops/components/rdc.yml +++ b/.azuredevops/components/rdc.yml @@ -60,8 +60,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -113,7 +111,7 @@ jobs: - job: rdc_testing dependsOn: rdc - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -131,6 +129,8 @@ jobs: aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + parameters: + gpuTarget: $(JOB_GPU_TARGET) - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters: ${{ if eq(parameters.checkoutRef, '') }}: @@ -156,6 +156,7 @@ jobs: sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf sudo ldconfig -v + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - task: Bash@3 displayName: Test rdc inputs: diff --git a/.azuredevops/components/rocAL.yml b/.azuredevops/components/rocAL.yml index 11ac334e2..72199e14b 100644 --- a/.azuredevops/components/rocAL.yml +++ b/.azuredevops/components/rocAL.yml @@ -53,6 +53,7 @@ parameters: - half - llvm-project - MIVisionX + - rocminfo - rocprofiler-register - ROCR-Runtime - rpp @@ -70,8 +71,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - task: Bash@3 displayName: 'Register libjpeg-turbo packages' @@ -157,7 +156,7 @@ jobs: - job: rocAL_testing dependsOn: rocAL - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -220,11 +219,11 @@ jobs: mkdir rocAL-tests cd rocAL-tests cmake $(Agent.BuildDirectory)/rocm/share/rocal/test + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocAL testDir: rocAL-tests - reloadAMDGPU: true - task: Bash@3 displayName: Clean up libjpeg-turbo inputs: diff --git a/.azuredevops/components/rocALUTION.yml b/.azuredevops/components/rocALUTION.yml index 64b645ba3..58c4eacb8 100644 --- a/.azuredevops/components/rocALUTION.yml +++ b/.azuredevops/components/rocALUTION.yml @@ -60,8 +60,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -98,7 +96,7 @@ jobs: - job: rocALUTION_testing dependsOn: rocALUTION - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -132,6 +130,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocALUTION diff --git a/.azuredevops/components/rocBLAS.yml b/.azuredevops/components/rocBLAS.yml index 5ddaae7d2..33271c898 100644 --- a/.azuredevops/components/rocBLAS.yml +++ b/.azuredevops/components/rocBLAS.yml @@ -40,6 +40,7 @@ parameters: - aomp-extras - hipBLAS-common - hipBLASLt + - roctracer - name: rocmTestDependencies type: object default: @@ -74,8 +75,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -121,7 +120,7 @@ jobs: - job: rocBLAS_testing dependsOn: rocBLAS - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -156,6 +155,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocBLAS diff --git a/.azuredevops/components/rocDecode.yml b/.azuredevops/components/rocDecode.yml index e94a7e411..f69ff0693 100644 --- a/.azuredevops/components/rocDecode.yml +++ b/.azuredevops/components/rocDecode.yml @@ -17,7 +17,7 @@ parameters: - libavformat-dev - libavutil-dev - libstdc++-12-dev - - libva-dev + - libva-amdgpu-dev - mesa-amdgpu-va-drivers - libdrm-dev - name: rocmDependencies @@ -35,6 +35,7 @@ parameters: default: - clr - llvm-project + - rocminfo - rocprofiler-register - ROCR-Runtime @@ -88,11 +89,13 @@ jobs: - job: rocDecode_testing dependsOn: rocDecode - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml - pool: $(JOB_TEST_POOL) + pool: + name: $(JOB_TEST_POOL) + demands: firstRenderDeviceAccess workspace: clean: all strategy: @@ -101,6 +104,18 @@ jobs: JOB_GPU_TARGET: gfx942 JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} steps: + # Since mesa-amdgpu-multimedia-devel is not directly available from apt, register it + - task: Bash@3 + displayName: 'Register ROCm packages' + inputs: + targetType: inline + script: | + sudo mkdir --parents --mode=0755 /etc/apt/keyrings + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/${{ variables.KEYRING_VERSION }}/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ variables.KEYRING_VERSION }} jammy main" | sudo tee --append /etc/apt/sources.list.d/rocm.list + echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 + sudo apt update - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} @@ -128,6 +143,7 @@ jobs: mkdir rocDecode-tests cd rocDecode-tests cmake /opt/rocm/share/rocdecode/test + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocDecode diff --git a/.azuredevops/components/rocFFT.yml b/.azuredevops/components/rocFFT.yml index 06274c702..5d1d1f2bb 100644 --- a/.azuredevops/components/rocFFT.yml +++ b/.azuredevops/components/rocFFT.yml @@ -55,8 +55,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -95,7 +93,7 @@ jobs: - job: rocFFT_testing dependsOn: rocFFT - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -129,6 +127,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocFFT diff --git a/.azuredevops/components/rocMLIR.yml b/.azuredevops/components/rocMLIR.yml index 659c45696..ecfa0f2b5 100644 --- a/.azuredevops/components/rocMLIR.yml +++ b/.azuredevops/components/rocMLIR.yml @@ -13,6 +13,7 @@ parameters: - git - python3-pip - libdrm-dev + - libstdc++-12-dev - name: pipModules type: object default: @@ -28,7 +29,7 @@ parameters: - ROCR-Runtime jobs: -- job: rocMLIR_library +- job: rocMLIR variables: - group: common - template: /.azuredevops/variables-global.yml @@ -66,16 +67,19 @@ jobs: # compiling and running test on the test system together - job: rocMLIR_testing + dependsOn: rocMLIR + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml - pool: ${{ variables.GFX942_TEST_POOL }} + pool: $(JOB_TEST_POOL) workspace: clean: all strategy: matrix: gfx942: JOB_GPU_TARGET: gfx942 + JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -119,6 +123,7 @@ jobs: -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) -DROCM_TEST_CHIPSET=$(JOB_GPU_TARGET) -GNinja + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocMLIR diff --git a/.azuredevops/components/rocPRIM.yml b/.azuredevops/components/rocPRIM.yml index ef6f35cdb..a620c4749 100644 --- a/.azuredevops/components/rocPRIM.yml +++ b/.azuredevops/components/rocPRIM.yml @@ -42,8 +42,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -77,7 +75,7 @@ jobs: - job: rocPRIM_testing dependsOn: rocPRIM - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -111,6 +109,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocPRIM diff --git a/.azuredevops/components/rocPyDecode.yml b/.azuredevops/components/rocPyDecode.yml index 3226c0b28..92bd5b6e0 100644 --- a/.azuredevops/components/rocPyDecode.yml +++ b/.azuredevops/components/rocPyDecode.yml @@ -8,25 +8,22 @@ parameters: - name: aptPackages type: object default: - - python3-pip - cmake - - ninja-build - - pkg-config - ffmpeg - libavcodec-dev - libavformat-dev - libavutil-dev - - libva-dev - - libdrm-dev - - pybind11-dev - - python3-pybind11 - libdlpack-dev + - libdrm-dev + - libva-dev + - ninja-build + - pkg-config + - python3-pip - name: pipModules type: object default: - - -i - - https://test.pypi.org/simple - - hip-python + - numpy + - pybind11 - name: rocmDependencies type: object default: @@ -52,13 +49,16 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }} + - task: Bash@3 + displayName: 'pip install hip-python' + inputs: + targetType: inline + script: pip install -i https://test.pypi.org/simple hip-python - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: @@ -80,11 +80,19 @@ jobs: script: | sudo rm -rf /opt/rocm sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm + - task: Bash@3 + displayName: 'Save Python Package Paths' + inputs: + targetType: inline + script: | + echo "##vso[task.setvariable variable=PYTHON_USER_SITE;]$(python3 -m site --user-site)" + echo "##vso[task.setvariable variable=PYTHON_DIST_PACKAGES;]$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" + echo "##vso[task.setvariable variable=PYBIND11_PATH;]$(python3 -c 'import pybind11; print(pybind11.get_cmake_dir())')" - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml parameters: extraBuildFlags: >- -DROCM_PATH=$(Agent.BuildDirectory)/rocm - -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(PYTHON_USER_SITE)/pybind11;$(PYTHON_DIST_PACKAGES)/pybind11;$(PYBIND11_PATH) -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) -DCMAKE_INSTALL_PREFIX_PYTHON=$(Build.BinariesDirectory) @@ -97,7 +105,10 @@ jobs: displayName: Create wheel file inputs: targetType: inline - script: python setup.py bdist_wheel + script: | + export ROCM_PATH=$(Agent.BuildDirectory)/rocm + export HIP_INCLUDE_DIRS=$(Agent.BuildDirectory)/rocm/include/hip + python3 setup.py bdist_wheel workingDirectory: $(Build.SourcesDirectory) - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml parameters: @@ -113,11 +124,13 @@ jobs: - job: rocPyDecode_testing dependsOn: rocPyDecode - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml - pool: $(JOB_TEST_POOL) + pool: + name: $(JOB_TEST_POOL) + demands: firstRenderDeviceAccess workspace: clean: all strategy: @@ -126,11 +139,35 @@ jobs: JOB_GPU_TARGET: gfx942 JOB_TEST_POOL: ${{ variables.GFX942_TEST_POOL }} steps: + - task: Bash@3 + displayName: Ensure pybind11-dev is not installed + inputs: + targetType: inline + script: | + if dpkg -l | grep -q pybind11-dev; then + echo "Removing pybind11-dev..." + sudo apt remove -y pybind11-dev + else + echo "pybind11-dev is not installed." + fi - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - task: Bash@3 + displayName: 'pip install hip-python' + inputs: + targetType: inline + script: pip install -i https://test.pypi.org/simple hip-python - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + - task: DownloadPipelineArtifact@2 + displayName: 'Download Pipeline Wheel Files' + inputs: + itemPattern: '**/*.whl' + targetPath: $(Agent.BuildDirectory) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters: ${{ if eq(parameters.checkoutRef, '') }}: @@ -145,27 +182,44 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml - parameters: - checkoutRepo: ${{ parameters.checkoutRepo }} - task: Bash@3 - displayName: Setup test environment + displayName: pip install + inputs: + targetType: inline + script: | + pip uninstall -y rocPyDecode + find -name *.whl -exec pip install {} \; + workingDirectory: $(Agent.BuildDirectory) + - task: Bash@3 + displayName: Setup search paths inputs: targetType: inline script: | sudo rm -rf /opt/rocm sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm - cd $(Build.SourcesDirectory) - sudo pip install . - cmake -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) . + echo "##vso[task.setvariable variable=PYTHON_USER_SITE;]$(python3 -m site --user-site)" + echo "##vso[task.setvariable variable=PYTHON_DIST_PACKAGES;]$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" + echo "##vso[task.setvariable variable=PYBIND11_PATH;]$(python3 -c 'import pybind11; print(pybind11.get_cmake_dir())')" + - task: CMake@1 + displayName: 'rocPyDecode Test CMake Flags' + inputs: + cmakeArgs: >- + -DROCM_PATH=$(Agent.BuildDirectory)/rocm + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(PYTHON_USER_SITE)/pybind11;$(PYTHON_DIST_PACKAGES)/pybind11;$(PYBIND11_PATH) + -DCMAKE_BUILD_TYPE=Release + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) + .. + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocPyDecode - testDir: $(Build.SourcesDirectory) + testDir: $(Build.SourcesDirectory)/build # sudo required for pip install but screws up permissions for next pipeline run - task: Bash@3 displayName: Clean up test environment + condition: always() inputs: targetType: inline script: | - sudo rm -rf $(Build.SourcesDirectory)/* + pip uninstall -y rocPyDecode + pip uninstall -y hip-python diff --git a/.azuredevops/components/rocRAND.yml b/.azuredevops/components/rocRAND.yml index a3178779c..81b320a77 100644 --- a/.azuredevops/components/rocRAND.yml +++ b/.azuredevops/components/rocRAND.yml @@ -45,8 +45,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -79,7 +77,7 @@ jobs: - job: rocRAND_testing dependsOn: rocRAND - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -113,6 +111,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocRAND diff --git a/.azuredevops/components/rocSOLVER.yml b/.azuredevops/components/rocSOLVER.yml index 286d80a26..326ce2618 100644 --- a/.azuredevops/components/rocSOLVER.yml +++ b/.azuredevops/components/rocSOLVER.yml @@ -44,6 +44,7 @@ parameters: - rocprofiler-register - ROCR-Runtime - rocSPARSE + - roctracer jobs: - job: rocSOLVER @@ -57,8 +58,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -112,7 +111,7 @@ jobs: - job: rocSOLVER_testing dependsOn: rocSOLVER - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -146,6 +145,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocSOLVER diff --git a/.azuredevops/components/rocSPARSE.yml b/.azuredevops/components/rocSPARSE.yml index 24301b7a3..cca206ae2 100644 --- a/.azuredevops/components/rocSPARSE.yml +++ b/.azuredevops/components/rocSPARSE.yml @@ -56,8 +56,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -107,7 +105,7 @@ jobs: - job: rocSPARSE_testing timeoutInMinutes: 90 dependsOn: rocSPARSE - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -141,6 +139,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocSPARSE diff --git a/.azuredevops/components/rocThrust.yml b/.azuredevops/components/rocThrust.yml index 39136d0e6..6fbd32c2d 100644 --- a/.azuredevops/components/rocThrust.yml +++ b/.azuredevops/components/rocThrust.yml @@ -47,8 +47,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -82,7 +80,7 @@ jobs: - job: rocThrust_testing dependsOn: rocThrust - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -116,6 +114,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocThrust diff --git a/.azuredevops/components/rocWMMA.yml b/.azuredevops/components/rocWMMA.yml index a96f2704b..ff08ff99d 100644 --- a/.azuredevops/components/rocWMMA.yml +++ b/.azuredevops/components/rocWMMA.yml @@ -56,8 +56,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -94,7 +92,7 @@ jobs: - job: rocWMMA_testing timeoutInMinutes: 90 dependsOn: rocWMMA - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -128,6 +126,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocWMMA diff --git a/.azuredevops/components/rocm-examples.yml b/.azuredevops/components/rocm-examples.yml index f27ced15d..ede8f395c 100644 --- a/.azuredevops/components/rocm-examples.yml +++ b/.azuredevops/components/rocm-examples.yml @@ -60,6 +60,7 @@ parameters: - rocSOLVER - rocSPARSE - rocThrust + - roctracer jobs: - job: rocm_examples @@ -73,8 +74,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -116,7 +115,7 @@ jobs: - job: rocm_examples_testing dependsOn: rocm_examples - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -135,9 +134,9 @@ jobs: parameters: aptPackages: ${{ parameters.aptPackages }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: - gpuTarget: $(JOB_GPU_TARGET) + checkoutRepo: ${{ parameters.checkoutRepo }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml parameters: ${{ if eq(parameters.checkoutRef, '') }}: @@ -152,20 +151,18 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds - - task: Bash@3 - displayName: Unload and reload AMDGPU - inputs: - targetType: inline - script: | - sudo modprobe -r amdgpu - sudo modprobe amdgpu - - task: Bash@3 - displayName: Iterate through examples - inputs: - targetType: inline - script: | - for file in *; do - echo Now running: $file - ./$file | tee -a $(TEST_LOG_FILE) - done - workingDirectory: $(Agent.BuildDirectory)/rocm/examples + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml + parameters: + # https://github.com/ROCm/HIP/issues/2203 + extraBuildFlags: >- + -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm + -DROCM_ROOT=$(Agent.BuildDirectory)/rocm + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) + -DCMAKE_HIP_ARCHITECTURES=$(JOB_GPU_TARGET) + -DCMAKE_EXE_LINKER_FLAGS=-fgpu-rdc + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: rocm-examples + testDir: $(Build.SourcesDirectory)/build diff --git a/.azuredevops/components/rocm_bandwidth_test.yml b/.azuredevops/components/rocm_bandwidth_test.yml index 854619c9b..3eaa089ac 100644 --- a/.azuredevops/components/rocm_bandwidth_test.yml +++ b/.azuredevops/components/rocm_bandwidth_test.yml @@ -27,6 +27,7 @@ parameters: - name: rocmTestDependencies type: object default: + - rocminfo - rocprofiler-register - ROCR-Runtime @@ -72,7 +73,7 @@ jobs: - job: rocm_bandwidth_test_testing dependsOn: rocm_bandwidth_test - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -99,6 +100,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocm_bandwidth_test diff --git a/.azuredevops/components/rocm_smi_lib.yml b/.azuredevops/components/rocm_smi_lib.yml index 0cd9e86ef..3a1468f42 100644 --- a/.azuredevops/components/rocm_smi_lib.yml +++ b/.azuredevops/components/rocm_smi_lib.yml @@ -29,7 +29,7 @@ jobs: - job: rocm_smi_lib_testing dependsOn: rocm_smi_lib - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -44,6 +44,9 @@ jobs: steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + parameters: + runRocminfo: false - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocm_smi_lib diff --git a/.azuredevops/components/rocminfo.yml b/.azuredevops/components/rocminfo.yml index f13d35f98..223a02090 100644 --- a/.azuredevops/components/rocminfo.yml +++ b/.azuredevops/components/rocminfo.yml @@ -49,7 +49,7 @@ jobs: - job: rocminfo_testing dependsOn: rocminfo - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -72,6 +72,9 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml + parameters: + runRocminfo: false - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocminfo diff --git a/.azuredevops/components/omniperf.yml b/.azuredevops/components/rocprofiler-compute.yml similarity index 87% rename from .azuredevops/components/omniperf.yml rename to .azuredevops/components/rocprofiler-compute.yml index 8a320f918..eb62b588a 100644 --- a/.azuredevops/components/omniperf.yml +++ b/.azuredevops/components/rocprofiler-compute.yml @@ -9,6 +9,7 @@ parameters: type: object default: - cmake + - locales - python3-pip - name: pipModules type: object @@ -46,7 +47,7 @@ parameters: - roctracer jobs: -- job: omniperf +- job: rocprofiler_compute variables: - group: common - template: /.azuredevops/variables-global.yml @@ -58,8 +59,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -90,9 +89,9 @@ jobs: parameters: gpuTarget: $(JOB_GPU_TARGET) -- job: omniperf_testing - dependsOn: omniperf - condition: succeeded() +- job: rocprofiler_compute_testing + dependsOn: rocprofiler_compute + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -112,6 +111,14 @@ jobs: aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }} - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - task: Bash@3 + displayName: Add en_US.UTF-8 locale + inputs: + targetType: inline + script: | + sudo locale-gen en_US.UTF-8 + sudo update-locale + locale -a - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml parameters: checkoutRepo: ${{ parameters.checkoutRepo }} @@ -150,11 +157,12 @@ jobs: -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTS=ON -DINSTALL_TESTS=ON + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: - componentName: omniperf - testDir: $(Build.BinariesDirectory)/libexec/omniperf - testExecutable: export OMNIPERF_ARCH_OVERRIDE="MI300X"; ctest + componentName: rocprofiler-compute + testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute + testExecutable: export ROCPROFCOMPUTE_ARCH_OVERRIDE="MI300X"; ctest - task: Bash@3 displayName: Remove ROCm binaries from PATH inputs: diff --git a/.azuredevops/components/rocprofiler-sdk.yml b/.azuredevops/components/rocprofiler-sdk.yml index 6a7a2a487..acf0ba574 100644 --- a/.azuredevops/components/rocprofiler-sdk.yml +++ b/.azuredevops/components/rocprofiler-sdk.yml @@ -55,8 +55,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: diff --git a/.azuredevops/components/rocprofiler-systems.yml b/.azuredevops/components/rocprofiler-systems.yml new file mode 100644 index 000000000..653e52f57 --- /dev/null +++ b/.azuredevops/components/rocprofiler-systems.yml @@ -0,0 +1,140 @@ +# largely referenced from: https://github.com/ROCm/omnitrace/blob/main/.github/workflows/ubuntu-jammy.yml +parameters: +- name: checkoutRepo + type: string + default: 'self' +- name: checkoutRef + type: string + default: '' +- name: aptPackages + type: object + default: + - autoconf + - autotools-dev + - bison + - build-essential + - bzip2 + - clang + - cmake + - environment-modules + - g++-12 + - libdrm-dev + - libfabric-dev + - libiberty-dev + - libpapi-dev + - libpfm4-dev + - libtool + - libopenmpi-dev + - m4 + - openmpi-bin + - software-properties-common + - python3-pip + - texinfo + - zlib1g-dev +- name: pipModules + type: object + default: + - numpy + - perfetto + - dataclasses +- name: rocmDependencies + type: object + default: + - aomp + - clr + - llvm-project + - rccl + - rocm-core + - rocm_smi_lib + - rocminfo + - ROCR-Runtime + - rocprofiler + - rocprofiler-register + - roctracer + +jobs: +- job: rocprofiler_systems + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: ${{ variables.MEDIUM_BUILD_POOL }} + workspace: + clean: all + strategy: + matrix: + gfx942: + JOB_GPU_TARGET: gfx942 + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + pipModules: ${{ parameters.pipModules }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml + parameters: + checkoutRepo: ${{ parameters.checkoutRepo }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmDependencies }} + gpuTarget: $(JOB_GPU_TARGET) + # CI case: download latest default branch build + ${{ if eq(parameters.checkoutRef, '') }}: + dependencySource: staging + # manual build case: triggered by ROCm/ROCm repo + ${{ elseif ne(parameters.checkoutRef, '') }}: + dependencySource: tag-builds + - task: Bash@3 + displayName: ROCm symbolic link + inputs: + targetType: inline + script: | + sudo rm -rf /opt/rocm + sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm + - task: Bash@3 + displayName: Add ROCm binaries to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin" + - task: Bash@3 + displayName: Add ROCm compilers to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin" + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml + parameters: +# build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html + extraBuildFlags: >- + -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm + -DROCPROFSYS_BUILD_TESTING=ON + -DROCPROFSYS_BUILD_DYNINST=ON + -DROCPROFSYS_BUILD_LIBUNWIND=ON + -DDYNINST_BUILD_TBB=ON + -DDYNINST_BUILD_ELFUTILS=ON + -DDYNINST_BUILD_LIBIBERTY=ON + -DDYNINST_BUILD_BOOST=ON + -DROCPROFSYS_USE_PAPI=ON + -DROCPROFSYS_USE_MPI=ON + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) + multithreadFlag: -- -j32 + - task: Bash@3 + displayName: Set up rocprofiler-systems env + inputs: + targetType: inline + script: source share/rocprofiler-systems/setup-env.sh + workingDirectory: build + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml + parameters: + componentName: rocprofiler-systems + - task: Bash@3 + displayName: Remove ROCm binaries from PATH + inputs: + targetType: inline + script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/bin;;' -e 's;^/;;' -e 's;/$;;')" + - task: Bash@3 + displayName: Remove ROCm compilers from PATH + inputs: + targetType: inline + script: echo "##vso[task.setvariable variable=PATH]$(echo $PATH | sed -e 's;:$(Agent.BuildDirectory)/rocm/llvm/bin;;' -e 's;^/;;' -e 's;/$;;')" + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml + parameters: + gpuTarget: $(JOB_GPU_TARGET) diff --git a/.azuredevops/components/rocprofiler.yml b/.azuredevops/components/rocprofiler.yml index 6cee8eb86..4d976659b 100644 --- a/.azuredevops/components/rocprofiler.yml +++ b/.azuredevops/components/rocprofiler.yml @@ -57,8 +57,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -100,7 +98,7 @@ jobs: - job: rocprofiler_testing dependsOn: rocprofiler - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -141,6 +139,7 @@ jobs: script: | sudo rm -rf /opt/rocm sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocprofilerV1 diff --git a/.azuredevops/components/rocr_debug_agent.yml b/.azuredevops/components/rocr_debug_agent.yml index cdf8ecd31..23263cfa0 100644 --- a/.azuredevops/components/rocr_debug_agent.yml +++ b/.azuredevops/components/rocr_debug_agent.yml @@ -72,7 +72,7 @@ jobs: - job: rocr_debug_agent_testing dependsOn: rocr_debug_agent - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -114,6 +114,7 @@ jobs: cmakeBuildDir: '$(Agent.BuildDirectory)/rocm/src/rocm-debug-agent-test' cmakeSourceDir: '.' installEnabled: false + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rocr_debug_agent diff --git a/.azuredevops/components/roctracer.yml b/.azuredevops/components/roctracer.yml index 624eef6ec..e54c1a4cf 100644 --- a/.azuredevops/components/roctracer.yml +++ b/.azuredevops/components/roctracer.yml @@ -30,6 +30,7 @@ parameters: default: - clr - llvm-project + - rocminfo - rocprofiler-register - ROCR-Runtime @@ -48,8 +49,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -77,6 +76,7 @@ jobs: -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm -DGPU_TARGETS=$(JOB_GPU_TARGET) + -DAMDGPU_TARGETS=$(JOB_GPU_TARGET) -GNinja - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml parameters: @@ -84,7 +84,7 @@ jobs: - job: roctracer_testing dependsOn: roctracer - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -118,6 +118,7 @@ jobs: dependencySource: staging ${{ elseif ne(parameters.checkoutRef, '') }}: dependencySource: tag-builds + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: roctracer diff --git a/.azuredevops/components/rpp.yml b/.azuredevops/components/rpp.yml index 90b82087d..7d3c36a80 100644 --- a/.azuredevops/components/rpp.yml +++ b/.azuredevops/components/rpp.yml @@ -56,8 +56,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml parameters: @@ -93,7 +91,7 @@ jobs: - job: rpp_testing dependsOn: rpp - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -173,6 +171,7 @@ jobs: cmake /opt/rocm/share/rpp/test \ -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++ \ -DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml parameters: componentName: rpp diff --git a/.azuredevops/dependencies/boost.yml b/.azuredevops/dependencies/boost.yml new file mode 100644 index 000000000..16cc1a699 --- /dev/null +++ b/.azuredevops/dependencies/boost.yml @@ -0,0 +1,65 @@ +parameters: +- name: checkoutRepo + type: string + default: 'self' +- name: checkoutRef + type: string + default: '' +- name: boostVersion + type: string + default: '' +- name: aptPackages + type: object + default: + - git +- name: rocmDependencies + type: object + default: + - llvm-project + +jobs: +- job: boost + variables: + - group: common + - template: /.azuredevops/variables-global.yml + pool: + vmImage: ${{ variables.BASE_BUILD_POOL }} + workspace: + clean: all + steps: + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml + parameters: + aptPackages: ${{ parameters.aptPackages }} + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml + - task: Bash@3 + displayName: 'git clone boost' + inputs: + targetType: inline + script: git clone -b ${{ parameters.boostVersion }} https://github.com/boostorg/boost --depth=1 --recurse-submodules + workingDirectory: $(Build.SourcesDirectory) + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml + parameters: + dependencyList: ${{ parameters.rocmDependencies }} + dependencySource: staging + - task: Bash@3 + displayName: Add ROCm binaries to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin" + - task: Bash@3 + displayName: Add ROCm compilers to PATH + inputs: + targetType: inline + script: echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin" + - task: Bash@3 + displayName: 'Build Boost with clang' + inputs: + targetType: inline + script: | + export CC=$(Agent.BuildDirectory)/rocm/llvm/bin/clang + export CXX=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++ + ./bootstrap.sh --with-toolset=clang --prefix=$(Build.BinariesDirectory) + ./b2 --toolset=clang threading=multi link=shared --prefix=$(Build.BinariesDirectory) cxxflags="-std=c++20" + ./b2 install + workingDirectory: $(Build.SourcesDirectory)/boost + - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml diff --git a/.azuredevops/dependencies/grpc.yml b/.azuredevops/dependencies/grpc.yml index d9ea15fb2..dd6b65345 100644 --- a/.azuredevops/dependencies/grpc.yml +++ b/.azuredevops/dependencies/grpc.yml @@ -20,7 +20,7 @@ jobs: variables: - group: common - template: /.azuredevops/variables-global.yml - pool: + pool: vmImage: ${{ variables.BASE_BUILD_POOL }} workspace: clean: all diff --git a/.azuredevops/dependencies/gtest.yml b/.azuredevops/dependencies/gtest.yml index dcdffe698..5d18d9baa 100644 --- a/.azuredevops/dependencies/gtest.yml +++ b/.azuredevops/dependencies/gtest.yml @@ -20,7 +20,7 @@ jobs: variables: - group: common - template: /.azuredevops/variables-global.yml - pool: + pool: vmImage: ${{ variables.BASE_BUILD_POOL }} workspace: clean: all diff --git a/.azuredevops/dependencies/lapack.yml b/.azuredevops/dependencies/lapack.yml index b2729d22d..c1a15a138 100644 --- a/.azuredevops/dependencies/lapack.yml +++ b/.azuredevops/dependencies/lapack.yml @@ -21,7 +21,7 @@ jobs: variables: - group: common - template: /.azuredevops/variables-global.yml - pool: + pool: vmImage: ${{ variables.BASE_BUILD_POOL }} workspace: clean: all diff --git a/.azuredevops/nightly/pytorch.yml b/.azuredevops/nightly/pytorch.yml index 8ff217ba7..4338630ad 100644 --- a/.azuredevops/nightly/pytorch.yml +++ b/.azuredevops/nightly/pytorch.yml @@ -11,7 +11,6 @@ parameters: - ca-certificates - bc - bridge-utils - - cmake - devscripts - dkms - doxygen @@ -67,8 +66,9 @@ parameters: - name: pipModules type: object default: + - cmake - astunparse - - expecttest!=0.2.0 + - expecttest>=0.2.1 - hypothesis - numpy - psutil @@ -85,14 +85,15 @@ parameters: - lintrunner - ninja - packaging - - optree>=0.12.0 + - optree>=0.13.0 + - click>=8.0.3 # list for vision - auditwheel - future - pytest - pytest-azurepipelines - pillow -# list from https://github.com/pytorch/builder/blob/main/manywheel/build_rocm.sh +# list from https://github.com/pytorch/pytorch/blob/main/.ci/manywheel/build_rocm.sh - name: rocmDependencies type: object default: @@ -122,6 +123,7 @@ parameters: - hipCUB - rocThrust - hipBLAS-common + - composable_kernel - name: rocmTestDependencies type: object default: @@ -159,13 +161,10 @@ jobs: amd-staging-gfx942: ROCM_BRANCH: amd-staging JOB_GPU_TARGET: gfx942 - amd-staging-gfx90a: - ROCM_BRANCH: amd-staging - JOB_GPU_TARGET: gfx90a variables: - group: common - template: /.azuredevops/variables-global.yml -# various flags/parameters expected by bash scripts in pytorch builder repo +# various flags/parameters expected by bash scripts in pytorch repo's .ci directory - name: ROCM_VERSION value: 6.3.0 - name: ROCM_PATH @@ -186,7 +185,7 @@ jobs: workspace: clean: all steps: -# copy environment setup from https://github.com/pytorch/builder/blob/main/manywheel/Dockerfile +# copy environment setup from https://github.com/pytorch/pytorch/blob/main/.ci/docker/manywheel/Dockerfile # but instead of centos, use ubuntu environment - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml - task: Bash@3 @@ -223,18 +222,21 @@ jobs: targetType: inline script: sudo ln -s $(Agent.BuildDirectory)/rocm /opt/rocm - checkout: self - - task: Bash@3 - displayName: git clone pytorch builder - inputs: - targetType: inline - script: git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules - workingDirectory: $(Build.SourcesDirectory) - task: Bash@3 displayName: git clone upstream pytorch inputs: targetType: inline script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules workingDirectory: $(Build.SourcesDirectory) +# builder clone still needed due to run_tests.sh at end of build_common.sh call + - task: Bash@3 + displayName: git clone pytorch builder + inputs: + targetType: inline + script: | + git clone https://github.com/pytorch/builder.git --depth=1 --recurse-submodules + sudo ln -s $(Build.SourcesDirectory)/builder /builder + workingDirectory: $(Build.SourcesDirectory) - task: Bash@3 displayName: Install patchelf inputs: @@ -287,8 +289,8 @@ jobs: PYTORCH_BUILD_VERSION=$(cat $(Build.SourcesDirectory)/pytorch/version.txt | cut -da -f1) PYTORCH_BUILD_NUMBER=$(date -u +%Y%m%d) SKIP_ALL_TESTS=1 - bash ./manywheel/build_rocm.sh - workingDirectory: $(Build.SourcesDirectory)/builder + bash ./.ci/manywheel/build_rocm.sh + workingDirectory: $(Build.SourcesDirectory)/pytorch - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml parameters: sourceDir: /remote/wheelhouserocm$(ROCM_VERSION) @@ -318,13 +320,6 @@ jobs: targetType: inline script: git clone https://github.com/pytorch/vision.git --depth=1 --recurse-submodules workingDirectory: $(Build.SourcesDirectory) - - task: Bash@3 - displayName: Apply vision patch - inputs: - targetType: inline - script: | - git apply $(Build.SourcesDirectory)/.azuredevops/patches/torchvision-package-name.patch - workingDirectory: $(Build.SourcesDirectory)/vision - task: Bash@3 displayName: Build vision inputs: @@ -355,7 +350,7 @@ jobs: - job: torchvision_testing dependsOn: pytorch - condition: succeeded() + condition: and(succeeded(), eq(variables.ENABLE_GFX942_TESTS, 'true'), not(containsValue(split(variables.DISABLED_GFX942_TESTS, ','), variables['Build.DefinitionName']))) variables: - group: common - template: /.azuredevops/variables-global.yml @@ -384,7 +379,6 @@ jobs: parameters: aptPackages: ${{ parameters.aptPackages }} pipModules: ${{ parameters.pipModules }} - - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml - task: DownloadPipelineArtifact@2 displayName: 'Download Pipeline Wheel Files' diff --git a/.azuredevops/nightly/rocm-nightly.yml b/.azuredevops/nightly/rocm-nightly.yml index 8591060be..fdfb2a006 100644 --- a/.azuredevops/nightly/rocm-nightly.yml +++ b/.azuredevops/nightly/rocm-nightly.yml @@ -26,7 +26,6 @@ parameters: - llvm-project - MIOpen - MIVisionX - - omniperf - rccl - rdc - rocAL @@ -45,7 +44,10 @@ parameters: - rocm_bandwidth_test - rocm_smi_lib - rocPRIM + - rocprofiler-compute - rocprofiler-register + - rocprofiler-sdk + - rocprofiler-systems - rocprofiler - rocPyDecode - ROCR-Runtime @@ -80,8 +82,6 @@ jobs: matrix: gfx942: JOB_GPU_TARGET: gfx942 - gfx90a: - JOB_GPU_TARGET: gfx90a steps: - task: DeleteFiles@1 displayName: 'Cleanup checkout space' @@ -118,7 +118,7 @@ jobs: - script: du -sh $(Build.ArtifactStagingDirectory) displayName: Compressed ROCm size - task: PublishPipelineArtifact@1 - displayName: 'Public ROCm Nightly Artifact' + displayName: 'Publish ROCm Nightly Artifact' retryCountOnTaskFailure: 3 inputs: targetPath: '$(Build.ArtifactStagingDirectory)' diff --git a/.azuredevops/patches/torchvision-package-name.patch b/.azuredevops/patches/torchvision-package-name.patch deleted file mode 100644 index 5af8cdb90..000000000 --- a/.azuredevops/patches/torchvision-package-name.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 036307033d5c187b3123dae46477feacbd06d0ab Mon Sep 17 00:00:00 2001 -From: Joseph Macaranas -Date: Sun, 22 Sep 2024 23:03:48 -0400 -Subject: [PATCH] Allow custom package name for CI builds of torchvision - ---- - setup.py | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/setup.py b/setup.py -index 4b0525d8e4..2c51ce04f2 100644 ---- a/setup.py -+++ b/setup.py -@@ -42,7 +42,7 @@ CSRS_DIR = ROOT_DIR / "torchvision/csrc" - IS_ROCM = (torch.version.hip is not None) and (ROCM_HOME is not None) - BUILD_CUDA_SOURCES = (torch.cuda.is_available() and ((CUDA_HOME is not None) or IS_ROCM)) or FORCE_CUDA - --PACKAGE_NAME = "torchvision" -+PACKAGE_NAME = os.getenv("TORCHVISION_PACKAGE_NAME", "torchvision") - - print("Torchvision build configuration:") - print(f"{FORCE_CUDA = }") -@@ -98,7 +98,7 @@ def get_requirements(): - except DistributionNotFound: - return None - -- pytorch_dep = "torch" -+ pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch") - if os.getenv("PYTORCH_VERSION"): - pytorch_dep += "==" + os.getenv("PYTORCH_VERSION") - --- -2.44.0.windows.1 - diff --git a/.azuredevops/tag-builds/omniperf.yml b/.azuredevops/tag-builds/Tensile.yml similarity index 85% rename from .azuredevops/tag-builds/omniperf.yml rename to .azuredevops/tag-builds/Tensile.yml index a1a396103..871c20b31 100644 --- a/.azuredevops/tag-builds/omniperf.yml +++ b/.azuredevops/tag-builds/Tensile.yml @@ -16,14 +16,14 @@ resources: - repository: release_repo type: github endpoint: ROCm - name: ROCm/omniperf + name: ROCm/Tensile ref: ${{ parameters.checkoutRef }} trigger: none pr: none jobs: - - template: ${{ variables.CI_COMPONENT_PATH }}/omniperf.yml + - template: ${{ variables.CI_COMPONENT_PATH }}/Tensile.yml parameters: checkoutRepo: release_repo checkoutRef: ${{ parameters.checkoutRef }} diff --git a/.azuredevops/tag-builds/boost.yml b/.azuredevops/tag-builds/boost.yml new file mode 100644 index 000000000..1411679e2 --- /dev/null +++ b/.azuredevops/tag-builds/boost.yml @@ -0,0 +1,23 @@ +variables: +- group: common +- template: /.azuredevops/variables-global.yml + +parameters: +- name: boostVersion + type: string + default: 'master' + +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + +trigger: none +pr: none + +jobs: + - template: ${{ variables.CI_DEPENDENCIES_PATH }}/boost.yml + parameters: + boostVersion: ${{ parameters.boostVersion }} diff --git a/.azuredevops/tag-builds/rocprofiler-compute.yml b/.azuredevops/tag-builds/rocprofiler-compute.yml new file mode 100644 index 000000000..efaf10068 --- /dev/null +++ b/.azuredevops/tag-builds/rocprofiler-compute.yml @@ -0,0 +1,29 @@ +variables: +- group: common +- template: /.azuredevops/variables-global.yml + +parameters: +- name: checkoutRef + type: string + default: refs/tags/$(LATEST_RELEASE_TAG) + +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + - repository: release_repo + type: github + endpoint: ROCm + name: ROCm/rocprofiler-compute + ref: ${{ parameters.checkoutRef }} + +trigger: none +pr: none + +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-compute.yml + parameters: + checkoutRepo: release_repo + checkoutRef: ${{ parameters.checkoutRef }} diff --git a/.azuredevops/tag-builds/rocprofiler-systems.yml b/.azuredevops/tag-builds/rocprofiler-systems.yml new file mode 100644 index 000000000..559b3362c --- /dev/null +++ b/.azuredevops/tag-builds/rocprofiler-systems.yml @@ -0,0 +1,29 @@ +variables: +- group: common +- template: /.azuredevops/variables-global.yml + +parameters: +- name: checkoutRef + type: string + default: refs/tags/$(LATEST_RELEASE_TAG) + +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + - repository: release_repo + type: github + endpoint: ROCm + name: ROCm/rocprofiler-systems + ref: ${{ parameters.checkoutRef }} + +trigger: none +pr: none + +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocprofiler-systems.yml + parameters: + checkoutRepo: release_repo + checkoutRef: ${{ parameters.checkoutRef }} diff --git a/.azuredevops/templates/steps/artifact-download.yml b/.azuredevops/templates/steps/artifact-download.yml index 55eac35e2..234452d35 100644 --- a/.azuredevops/templates/steps/artifact-download.yml +++ b/.azuredevops/templates/steps/artifact-download.yml @@ -9,6 +9,10 @@ parameters: - name: useDefaultBranch type: boolean default: true +# useMainlineBranch only processed if useDefaultBranch is false +- name: useMainlineBranch + type: boolean + default: false - name: latestFromBranch type: boolean default: true @@ -22,7 +26,7 @@ parameters: type: object default: AMDMIGraphX: develop - amdsmi: develop + amdsmi: amd-staging aomp-extras: aomp-dev aomp: aomp-dev clr: amd-staging @@ -46,8 +50,9 @@ parameters: MIOpen: develop MIVisionX: develop omniperf: amd-staging + omnitrace: amd-staging rccl: develop - rdc: develop + rdc: amd-staging rocAL: develop rocALUTION: develop rocBLAS: develop @@ -56,16 +61,19 @@ parameters: rocFFT: develop ROCgdb: amd-staging rocm-cmake: develop - rocm-core: master + rocm-core: amd-staging rocm-examples: develop rocminfo: amd-staging rocMLIR: develop ROCmValidationSuite: master rocm_bandwidth_test: master - rocm_smi_lib: develop + rocm_smi_lib: amd-staging rocPRIM: develop - rocprofiler-register: amd-mainline rocprofiler: amd-staging + rocprofiler-compute: amd-staging + rocprofiler-register: amd-staging + rocprofiler-sdk: amd-staging + rocprofiler-systems: amd-staging rocPyDecode: develop ROCR-Runtime: amd-staging rocRAND: develop @@ -76,26 +84,84 @@ parameters: roctracer: amd-staging rocWMMA: develop rpp: develop -- name: allowPartiallySucceededBuilds +- name: mainlineBranchList type: object default: - - amdsmi - - aomp - - HIPIFY - - MIVisionX - - omniperf - - rccl - - rdc - - rocm-cmake - - rocm_smi_lib - - rocFFT - - rpp + AMDMIGraphX: mainline + amdsmi: amd-mainline + aomp-extras: amd-mainline-open + aomp: amd-mainline-open + clr: amd-mainline + composable_kernel: mainline + half: rocm + HIP: amd-mainline + hip-tests: amd-mainline + hipBLAS: mainline + hipBLASLt: mainline + hipBLAS-common: mainline + hipCUB: mainline + hipFFT: mainline + hipfort: mainline + HIPIFY: amd-mainline + hipRAND: mainline + hipSOLVER: mainline + hipSPARSE: mainline + hipSPARSELt: mainline + hipTensor: mainline + llvm-project: amd-mainline-open + MIOpen: mainline + MIVisionX: mainline + omniperf: amd-mainline + omnitrace: amd-mainline + rccl: mainline + rdc: amd-mainline + rocAL: master # needs the yaml file + rocALUTION: mainline + rocBLAS: mainline + ROCdbgapi : amd-mainline + rocDecode: mainline + rocFFT: mainline + ROCgdb: amd-mainline-rocgdb-15 # + rocm-cmake: mainline + rocm-core: amd-master + rocm-examples: develop # no mainline + rocminfo: amd-master + rocMLIR: mainline # needs the yaml file + ROCmValidationSuite: mainline + rocm_bandwidth_test: master + rocm_smi_lib: amd-mainline + rocPRIM: mainline + rocprofiler: amd-master + rocprofiler-compute: amd-mainline + rocprofiler-register: amd-mainline + rocprofiler-sdk: amd-mainline + rocprofiler-systems: amd-mainline + rocPyDecode: mainline + ROCR-Runtime: amd-master + rocRAND: mainline + rocr_debug_agent: amd-mainline + rocSOLVER: mainline + rocSPARSE: mainline + rocThrust: mainline + roctracer: amd-master + rocWMMA: mainline + rpp: mainline # BELOW REQUIRED IF useDefaultBranch false - name: branchName type: string default: '$(Build.SourceBranchName)' # for tagged builds steps: +- task: Bash@3 + displayName: Set allowPartiallySucceededBuilds + inputs: + targetType: inline + script: | + if [[ ",$ALLOWED_PARTIAL_SUCCEED_BUILDS," == *",${{ parameters.componentName }},"* ]]; then + echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]true" + else + echo "##vso[task.setvariable variable=allowPartiallySucceededBuilds;]false" + fi - task: DownloadPipelineArtifact@2 displayName: Download ${{ parameters.componentName }} inputs: @@ -109,10 +175,11 @@ steps: buildVersionToDownload: latestFromBranch # default is 'latest' ${{ if eq(parameters.useDefaultBranch, true) }}: branchName: refs/heads/${{ parameters.defaultBranchList[parameters.componentName] }} + ${{ elseif eq(parameters.useMainlineBranch, true) }}: + branchName: refs/heads/${{ parameters.mainlineBranchList[parameters.componentName] }} ${{ else }}: branchName: ${{ parameters.branchName }} - ${{ if containsValue(parameters.allowPartiallySucceededBuilds, parameters.componentName) }}: - allowPartiallySucceededBuilds: true + allowPartiallySucceededBuilds: $(allowPartiallySucceededBuilds) targetPath: '$(Pipeline.Workspace)/d' - task: ExtractFiles@1 displayName: Extract ${{ parameters.componentName }} diff --git a/.azuredevops/templates/steps/dependencies-aqlprofile.yml b/.azuredevops/templates/steps/dependencies-aqlprofile.yml index af1f9339f..42ca8dac6 100644 --- a/.azuredevops/templates/steps/dependencies-aqlprofile.yml +++ b/.azuredevops/templates/steps/dependencies-aqlprofile.yml @@ -8,20 +8,22 @@ parameters: - name: repositoryUrl type: object default: - staging: https://repo.radeon.com/rocm/apt/6.2/pool/main/h/hsa-amd-aqlprofile - tag-builds: https://repo.radeon.com/rocm/apt/6.2/pool/main/h/hsa-amd-aqlprofile -- name: packageName - type: object - default: - staging: hsa-amd-aqlprofile_1.0.0.60200.60200-66~22.04_amd64.deb - tag-builds: hsa-amd-aqlprofile_1.0.0.60200.60200-66~22.04_amd64.deb + staging: https://repo.radeon.com/rocm/apt/latest/pool/main/h/hsa-amd-aqlprofile/ # end slash is important for curl! + tag-builds: https://repo.radeon.com/rocm/apt/$(TAGGED_RELEASE)/pool/main/h/hsa-amd-aqlprofile/ steps: +- task: Bash@3 + displayName: Get aqlprofile package name + inputs: + targetType: inline + script: | + export packageName=$(curl -s ${{ parameters.repositoryUrl[parameters.dependencySource] }} | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") + echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName" - task: Bash@3 displayName: 'Download aqlprofile' inputs: targetType: inline - script: wget -nv ${{ parameters.repositoryUrl[parameters.dependencySource] }}/${{ parameters.packageName[parameters.dependencySource] }} + script: wget -nv ${{ parameters.repositoryUrl[parameters.dependencySource] }}$(packageName) workingDirectory: '$(Pipeline.Workspace)' - task: Bash@3 displayName: 'Extract aqlprofile' @@ -29,7 +31,7 @@ steps: targetType: inline script: | mkdir hsa-amd-aqlprofile - dpkg-deb -R ${{ parameters.packageName[parameters.dependencySource] }} hsa-amd-aqlprofile + dpkg-deb -R $(packageName) hsa-amd-aqlprofile workingDirectory: '$(Pipeline.Workspace)' - task: Bash@3 displayName: 'Copy aqlprofile files' @@ -43,5 +45,5 @@ steps: displayName: 'Clean up aqlprofile' inputs: targetType: inline - script: rm -rf hsa-amd-aqlprofile ${{ parameters.packageName[parameters.dependencySource] }} + script: rm -rf hsa-amd-aqlprofile $(packageName) workingDirectory: '$(Pipeline.Workspace)' diff --git a/.azuredevops/templates/steps/dependencies-boost.yml b/.azuredevops/templates/steps/dependencies-boost.yml new file mode 100644 index 000000000..d944f7f66 --- /dev/null +++ b/.azuredevops/templates/steps/dependencies-boost.yml @@ -0,0 +1,35 @@ +steps: +- task: DownloadPipelineArtifact@2 + displayName: Download Boost + inputs: + buildType: specific + project: ROCm-CI + definition: $(BOOST_DEPENDENCY_PIPELINE_ID) + targetPath: $(Pipeline.Workspace)/d +- task: ExtractFiles@1 + displayName: Extract Boost + inputs: + archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz' + destinationFolder: $(Agent.BuildDirectory)/boost + cleanDestinationFolder: true + overwriteExistingFiles: true +- task: DeleteFiles@1 + displayName: Cleanup Compressed Boost + inputs: + SourceFolder: $(Pipeline.Workspace)/d + Contents: '**/*.tar.gz' + RemoveDotFiles: true +- task: Bash@3 + displayName: 'List Boost files' + inputs: + targetType: inline + script: ls -1R $(Agent.BuildDirectory)/boost +- task: Bash@3 + displayName: 'Link Boost shared libraries' + inputs: + targetType: inline + script: | + echo $(Agent.BuildDirectory)/boost/lib | sudo tee /etc/ld.so.conf.d/boost.conf + sudo cat /etc/ld.so.conf.d/boost.conf + sudo ldconfig -v + ldconfig -p diff --git a/.azuredevops/templates/steps/dependencies-other.yml b/.azuredevops/templates/steps/dependencies-other.yml index 69ec8e9cb..34a89d1a3 100644 --- a/.azuredevops/templates/steps/dependencies-other.yml +++ b/.azuredevops/templates/steps/dependencies-other.yml @@ -8,6 +8,12 @@ parameters: default: [] steps: +# firefox takes time to upgrade and is not needed for CI workloads, hold version +- task: Bash@3 + displayName: 'sudo apt-mark hold firefox' + inputs: + targetType: inline + script: sudo apt-mark hold firefox - task: Bash@3 displayName: 'sudo apt-get update' inputs: diff --git a/.azuredevops/templates/steps/dependencies-rocm.yml b/.azuredevops/templates/steps/dependencies-rocm.yml index 0b78e7565..2040f1242 100644 --- a/.azuredevops/templates/steps/dependencies-rocm.yml +++ b/.azuredevops/templates/steps/dependencies-rocm.yml @@ -9,6 +9,7 @@ parameters: default: staging values: - staging + - mainline - tag-builds - fixed - name: extractToMnt @@ -54,6 +55,7 @@ parameters: MIOpen: $(MIOpen_PIPELINE_ID) MIVisionX: $(MIVISIONX_PIPELINE_ID) omniperf: $(OMNIPERF_PIPELINE_ID) + omnitrace: $(OMNITRACE_PIPELINE_ID) rccl: $(RCCL_PIPELINE_ID) rdc: $(RDC_PIPELINE_ID) rocAL: $(ROCAL_PIPELINE_ID) @@ -72,7 +74,10 @@ parameters: rocm_bandwidth_test: $(ROCM_BANDWIDTH_TEST_PIPELINE_ID) rocm_smi_lib: $(ROCM_SMI_LIB_PIPELINE_ID) rocPRIM: $(ROCPRIM_PIPELINE_ID) + rocprofiler-compute: $(ROCPROFILER_COMPUTE_PIPELINE_ID) rocprofiler-register: $(ROCPROFILER_REGISTER_PIPELINE_ID) + rocprofiler-sdk: $(ROCPROFILER_SDK_PIPELINE_ID) + rocprofiler-systems: $(ROCPROFILER_SYSTEMS_PIPELINE_ID) rocprofiler: $(ROCPROFILER_PIPELINE_ID) rocPyDecode: $(ROCPYDECODE_PIPELINE_ID) ROCR-Runtime: $(ROCR_RUNTIME_PIPELINE_ID) @@ -113,6 +118,7 @@ parameters: MIOpen: $(MIOpen_TAGGED_PIPELINE_ID) MIVisionX: $(MIVISIONX_TAGGED_PIPELINE_ID) omniperf: $(OMNIPERF_TAGGED_PIPELINE_ID) + omnitrace: $(OMNITRACE_TAGGED_PIPELINE_ID) rccl: $(RCCL_TAGGED_PIPELINE_ID) rdc: $(RDC_TAGGED_PIPELINE_ID) rocAL: $(ROCAL_TAGGED_PIPELINE_ID) @@ -131,7 +137,10 @@ parameters: rocm_bandwidth_test: $(ROCM_BANDWIDTH_TEST_TAGGED_PIPELINE_ID) rocm_smi_lib: $(ROCM_SMI_LIB_TAGGED_PIPELINE_ID) rocPRIM: $(ROCPRIM_TAGGED_PIPELINE_ID) + rocprofiler-compute: $(ROCPROFILER_COMPUTE_TAGGED_PIPELINE_ID) rocprofiler-register: $(ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID) + rocprofiler-sdk: $(ROCPROFILER_SDK_TAGGED_PIPELINE_ID) + rocprofiler-systems: $(ROCPROFILER_SYSTEMS_PIPELINE_ID) rocprofiler: $(ROCPROFILER_TAGGED_PIPELINE_ID) rocPyDecode: $(ROCPYDECODE_TAGGED_PIPELINE_ID) ROCR-Runtime: $(ROCR_RUNTIME_TAGGED_PIPELINE_ID) @@ -172,12 +181,16 @@ parameters: - hipRAND - hipSPARSELt - hipTensor + - omnitrace - rccl - rocALUTION - rocBLAS - rocFFT - rocm-examples - rocPRIM + - rocprofiler-compute + - rocprofiler-sdk + - rocprofiler-systems - rocprofiler - rocPyDecode - rocRAND @@ -213,6 +226,10 @@ steps: ${{ if eq(parameters.dependencySource, 'staging') }}: pipelineId: ${{ parameters.stagingPipelineIdentifiers[ split(dependency, ':')[0] ] }} latestFromBranch: ${{ parameters.latestFromBranch }} + ${{ elseif eq(parameters.dependencySource, 'mainline') }}: + pipelineId: ${{ parameters.stagingPipelineIdentifiers[ split(dependency, ':')[0] ] }} + useMainlineBranch: true + latestFromBranch: ${{ parameters.latestFromBranch }} ${{ elseif eq(parameters.dependencySource, 'tag-builds') }}: pipelineId: ${{ parameters.taggedPipelineIdentifiers[ split(dependency, ':')[0] ] }} latestFromBranch: false @@ -229,6 +246,10 @@ steps: ${{ if eq(parameters.dependencySource, 'staging') }}: pipelineId: ${{ parameters.stagingPipelineIdentifiers[dependency] }} latestFromBranch: ${{ parameters.latestFromBranch }} + ${{ elseif eq(parameters.dependencySource, 'mainline') }}: + pipelineId: ${{ parameters.stagingPipelineIdentifiers[dependency] }} + useMainlineBranch: true + latestFromBranch: ${{ parameters.latestFromBranch }} ${{ elseif eq(parameters.dependencySource, 'tag-builds') }}: pipelineId: ${{ parameters.taggedPipelineIdentifiers[dependency] }} latestFromBranch: false diff --git a/.azuredevops/templates/steps/gpu-diagnostics.yml b/.azuredevops/templates/steps/gpu-diagnostics.yml new file mode 100644 index 000000000..60bdac670 --- /dev/null +++ b/.azuredevops/templates/steps/gpu-diagnostics.yml @@ -0,0 +1,54 @@ +# Diagnostics for GPU-enabled systems +parameters: +- name: runRocminfo + type: boolean + default: true + +steps: +- ${{ if eq(parameters.runRocminfo, true) }}: + - task: Bash@3 + displayName: 'rocminfo' + continueOnError: true + inputs: + targetType: inline + script: $(Agent.BuildDirectory)/rocm/bin/rocminfo || true + - task: Bash@3 + displayName: 'rocm_agent_enumerator' + continueOnError: true + inputs: + targetType: inline + script: $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator || true +- task: Bash@3 + displayName: 'List DRI devices' + continueOnError: true + inputs: + targetType: inline + script: ls -la /dev/dri/ || true +- task: Bash@3 + displayName: 'List amdgpu/rocm/mesa packages' + continueOnError: true + inputs: + targetType: inline + script: apt list --installed | grep -E 'amdgpu|rocm|mesa' || true +- task: Bash@3 + displayName: 'List GPU processes' + continueOnError: true + inputs: + targetType: inline + script: | + ls /sys/class/kfd/kfd/proc/ || true + sudo lsof | grep amdgpu || true +- task: Bash@3 + displayName: 'System snapshot' + continueOnError: true + inputs: + targetType: inline + script: top -bn1 || true +- task: Bash@3 + displayName: 'List dmesg' + continueOnError: true + inputs: + targetType: inline + script: | + echo 'rocm-ci: $(Build.DefinitionName) $(System.DefinitionId)' | sudo tee /dev/kmsg || true + sudo dmesg || true diff --git a/.azuredevops/variables-global.yml b/.azuredevops/variables-global.yml index b0f2243b3..336934eb7 100644 --- a/.azuredevops/variables-global.yml +++ b/.azuredevops/variables-global.yml @@ -167,6 +167,10 @@ variables: value: 241 - name: OMNIPERF_TAGGED_PIPELINE_ID value: 242 +- name: OMNITRACE_PIPELINE_ID + value: 253 +- name: OMNITRACE_TAGGED_PIPELINE_ID + value: 252 - name: RCCL_GFX942_TEST_PIPELINE_ID value: 184 - name: RCCL_PIPELINE_ID @@ -255,10 +259,22 @@ variables: value: 20 - name: ROCPROFILER_GFX942_TEST_PIPELINE_ID value: 190 +- name: ROCPROFILER_COMPUTE_PIPELINE_ID + value: 257 +- name: ROCPROFILER_COMPUTE_TAGGED_PIPELINE_ID + value: 258 - name: ROCPROFILER_REGISTER_PIPELINE_ID value: 1 - name: ROCPROFILER_REGISTER_TAGGED_PIPELINE_ID value: 25 +- name: ROCPROFILER_SDK_PIPELINE_ID + value: 246 +- name: ROCPROFILER_SDK_TAGGED_PIPELINE_ID + value: 234 +- name: ROCPROFILER_SYSTEMS_PIPELINE_ID + value: 255 +- name: ROCPROFILER_SYSTEMS_TAGGED_PIPELINE_ID + value: 254 - name: ROCPROFILER_PIPELINE_ID value: 143 - name: ROCPROFILER_TAGGED_PIPELINE_ID @@ -321,3 +337,5 @@ variables: value: 78 - name: RPP_TAGGED_PIPELINE_ID value: 39 +- name: BOOST_DEPENDENCY_PIPELINE_ID + value: 250 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 59f713e31..7cb275391 100755 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,3 +5,4 @@ docs/ @amd-aakash @jlgreathouse @samjwu @yhuiYH @ROCm/rocm-documentation *.rst @amd-aakash @jlgreathouse @samjwu @yhuiYH @ROCm/rocm-documentation # External CI /.azuredevops/ @ROCm/external-ci +tools/rocm-build/ @ROCm/rocm-devops diff --git a/.wordlist.txt b/.wordlist.txt index 580890845..2b7b7eb70 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -36,6 +36,7 @@ Bluefield Bootloader CCD CDNA +CHTML CIFAR CLI CLion @@ -70,6 +71,7 @@ Concretized Conda ConnectX CuPy +Dashboarding DDR DF DGEMM @@ -227,6 +229,7 @@ Mellanox's Meta's Miniconda MirroredStrategy +Mixtral Multicore Multithreaded MyEnvironment @@ -273,6 +276,7 @@ OpenSSL OpenVX OpenXLA Oversubscription +PagedAttention PCC PCI PCIe @@ -294,6 +298,7 @@ PowerShell PyPi PyTorch Qcycles +Qwen RAII RAS RCCL @@ -563,6 +568,7 @@ hipfort hipify hipsolver hipsparse +hlist hotspotting hpc hpp @@ -586,6 +592,7 @@ intra invariants invocating ipo +jax kdb kfd latencies @@ -606,6 +613,7 @@ migraphx miopen miopengemm mivisionx +mjx mkdir mlirmiopen mtypes diff --git a/RELEASE.md b/RELEASE.md index 5ba7126db..726a599d4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,104 +1,69 @@ -# ROCm 6.2.2 release notes +# ROCm 6.2.4 release notes -These release notes provide a summary of notable changes since the previous ROCm release. +The release notes provide a summary of notable changes since the previous ROCm release. + +- [Release highlights](#release-highlights) + +- [Operating system and hardware support changes](#operating-system-and-hardware-support-changes) + +- [ROCm components versioning](#rocm-components) + +- [Detailed component changes](#detailed-component-changes) + +- [ROCm known issues](#rocm-known-issues) + +- [ROCm upcoming changes](#rocm-upcoming-changes) ```{note} -As ROCm 6.2.2 was released shortly after 6.2.1, the changes between these versions -are minimal. For a comprehensive overview of recent updates, the ROCm 6.2.1 release -notes are appended to the end of this document. - -For detailed information about the changes in ROCm 6.2.1, refer to the appended -section: [ROCm 6.2.1 release notes](rocm-6-2-1-release-notes). +If you’re using Radeon™ PRO or Radeon GPUs in a workstation setting with a +display connected, continue to use ROCm 6.2.3. See the [Use ROCm on Radeon +GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/index.html) +documentation to verify compatibility and system requirements. ``` -The [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.2/compatibility/compatibility-matrix.html) -provides the full list of supported hardware, operating systems, ecosystems, third-party components, and ROCm components -for each ROCm release. - -Release notes for previous ROCm releases are available in earlier versions of the documentation. -See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions.html). - ## Release highlights -The following is a significant fix introduced in ROCm 6.2.2. +The following are notable new features and improvements in ROCm 6.2.4. For changes to individual components, see +[Detailed component changes](#detailed-component-changes). -### Fixed Instinct MI300X error recovery failure +#### ROCm documentation updates -Improved the reliability of AMD Instinct MI300X accelerators in scenarios involving -uncorrectable errors. Previously, error recovery did not occur as expected, -potentially leaving the system in an undefined state. This fix ensures that error -recovery functions as expected, maintaining system stability. +ROCm documentation continues to be updated to provide clearer and more comprehensive guidance for +a wider variety of user needs and use cases. -See the [original issue](#instinct-mi300x-gpu-recovery-failure-on-uncorrectable-errors) -noted in the ROCm 6.2.1 release notes. +* Added a new GPU cluster networking guide. See + [Cluster network performance validation for AMD Instinct accelerators](https://rocm.docs.amd.com/projects/gpu-cluster-networking/en/latest/index.html). + This documentation provides guidelines on validating network configurations + in single-node and multi-node environments to attain optimal speed and bandwidth + in AMD Instinct-powered clusters. ---- +* Updated the HIP runtime documentation. -## ROCm 6.2.1 release notes + * Added a new section on how to use [HIP graphs](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hipgraph.html). -The ROCm 6.2.1 release notes document newly added ecosystem support, ROCm Offline Installer Creator updates, -and improvements to several ROCm libraries and tools. + * Added a new section about the [Stream ordered memory allocator (SOMA)](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/stream_ordered_allocator.html). -- [Release highlights](release-highlights) + * Updated the [Porting CUDA driver API](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_porting_driver_api.html) section. -- [Operating system and hardware support changes](operating-system-and-hardware-support-changes) +* Updated the [Post-installation instructions](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/install/post-install.html) + with guidance on using the `update-alternatives` utility and environment modules to help you manage multiple ROCm + versions and streamline PATH configuration. -- [ROCm components versioning](rocm-components) +* Updated the [LLM inference performance validation on AMD Instinct + MI300X](https://rocm.docs.amd.com/en/docs-6.2.4/how-to/performance-validation/mi300x/vllm-benchmark.html) + documentation with more detailed guidance, new models, and the `float8` data type. -- [Detailed component changes](detailed-component-changes) +## Operating system and hardware support changes -- [ROCm known issues](rocm-known-issues) +ROCm 6.2.4 adds support for the [AMD Radeon PRO V710](https://www.amd.com/en/products/accelerators/radeon-pro/amd-radeon-pro-v710.html) GPU for compute workloads. See +[Supported GPUs](https://advanced-micro-devices-demo--287.com.readthedocs.build/projects/install-on-linux-internal/en/287/reference/system-requirements.html) +for more information. -- [ROCm upcoming changes](rocm-upcoming-changes) +This release maintains the same operating system support as 6.2.2. -### Release highlights +## ROCm components -The following are notable new features and improvements in ROCm 6.2.1. For changes to individual components, see [Detailed component changes](#detailed-component-changes). - -#### rocAL major version change - -The new version of rocAL introduces many new features, but does not modify any of the existing public API functions. However, the version number was incremented from 1.3 to 2.0. -Applications linked to version 1.3 must be recompiled to link against version 2.0. - -See [the rocAL detailed changes](#rocal-2-0-0) for more information. - -#### New support for FBGEMM (Facebook General Matrix Multiplication) - -As of ROCm 6.2.1, ROCm supports Facebook General Matrix Multiplication (FBGEMM) and the related FBGEMM_GPU library. - -FBGEMM is a low-precision, high-performance CPU kernel library for convolution and matrix multiplication. It is used for server-side inference and as a back end for PyTorch quantized operators. FBGEMM_GPU includes a collection of PyTorch GPU operator libraries for training and inference. For more information, see the ROCm [Model acceleration libraries guide](https://rocm.docs.amd.com/en/docs-6.2.1/how-to/llm-fine-tuning-optimization/model-acceleration-libraries.html) -and [PyTorch's FBGEMM GitHub repository](https://github.com/pytorch/FBGEMM). - -#### ROCm Offline Installer Creator changes - -The [ROCm Offline Installer Creator 6.2.1](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.1/install/rocm-offline-installer.html) introduces several new features and improvements including: - -* Logging support for create and install logs -* More stringent checks for Linux versions and distributions -* Updated prerequisite repositories -* Fixed CTest issues - -#### ROCm documentation changes - -There have been no changes to supported hardware or operating systems from ROCm 6.2.0 to ROCm 6.2.1. - -* The Programming Model Reference and Understanding the Programming Model topics in HIP have been consolidated into one topic, -[HIP programming model (conceptual)](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/understand/programming_model.html). -* The [HIP virtual memory management](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/how-to/virtual_memory.html) and [HIP virtual memory management API](https://rocm.docs.amd.com/projects/HIP/en/docs-6.2.1/reference/virtual_memory_reference.html) topics have been added. - -```{note} -The ROCm documentation, like all ROCm projects, is open source and available on GitHub. To contribute to ROCm documentation, see the [ROCm documentation contribution guidelines](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). -``` - -### Operating system and hardware support changes - -ROCm 6.2.1 adds support for Ubuntu 24.04.1 (kernel: 6.8 [GA]). - -See the [Compatibility matrix](https://rocm.docs.amd.com/en/docs-6.2.1/compatibility/compatibility-matrix.html) for the full list of supported operating systems and hardware architectures. - -### ROCm components - -The following table lists the versions of ROCm components for ROCm 6.2.1, including any version changes from 6.2.0 to 6.2.1. +The following table lists the versions of ROCm components for ROCm 6.2.4, including any version changes from 6.2.2 to 6.2.4. Click the component's updated version to go to a detailed list of its changes. Click to go to the component's source code on GitHub. @@ -121,51 +86,51 @@ Click the component's updated version to go to a detailed list of its changes. C Libraries Machine learning and computer vision - Composable Kernel + Composable Kernel 1.1.0 - - MIGraphX + MIGraphX 2.10 - + - MIOpen + MIOpen 3.2.0 - - MIVisionX + MIVisionX 3.0.0 - - rocAL - 1.0.0 ⇒ 2.0.0 - rocAL + 2.0.0 + - rocDecode + rocDecode 0.6.0 - - rocPyDecode + rocPyDecode 0.1.0 - - RPP + RPP 1.8.0 - @@ -173,9 +138,9 @@ Click the component's updated version to go to a detailed list of its changes. C Communication - RCCL - 2.20.5 ⇒ 2.20.5 - RCCL + 2.20.5 + @@ -183,99 +148,99 @@ Click the component's updated version to go to a detailed list of its changes. C Math - hipBLAS + hipBLAS 2.2.0 - - hipBLASLt + hipBLASLt 0.8.0 - - hipFFT - 1.0.15 - hipFFT + 1.0.15 ⇒ 1.0.16 + - hipfort + hipfort 0.4.0 - - hipRAND - 2.11.0 - hipRAND + 2.11.0 ⇒ 2.11.1 + - hipSOLVER + hipSOLVER 2.2.0 - - hipSPARSE + hipSPARSE 3.1.1 - - hipSPARSELt + hipSPARSELt 0.2.1 - - rocALUTION - 3.2.0 - rocALUTION + 3.2.0 ⇒ 3.2.1 + - rocBLAS - 4.1.2 ⇒ 4.2.1 - rocBLAS + 4.2.1 ⇒ 4.2.4 + - rocFFT - 1.0.28 ⇒ 1.0.29 - rocFFT + 1.0.29 ⇒ 1.0.30 + - rocRAND - 3.1.0 - rocRAND + 3.1.0 ⇒ 3.1.1 + - rocSOLVER - 3.26.0 - rocSOLVER + 3.26.0 ⇒ 3.26.2 + - rocSPARSE - 3.2.0 - rocSPARSE + 3.2.0 ⇒ 3.2.1 + - rocWMMA + rocWMMA 1.5.0 - Tensile 4.41.0 - @@ -283,27 +248,27 @@ Click the component's updated version to go to a detailed list of its changes. C Primitives - hipCUB - 3.2.0 - hipCUB + 3.2.0 ⇒ 3.2.1 + - hipTensor + hipTensor 1.3.0 - - rocPRIM - 3.2.0 ⇒ 3.2.1 - rocPRIM + 3.2.1 ⇒ 3.2.2 + - rocThrust - 3.1.0 - rocThrust + 3.1.0 ⇒ 3.1.1 + @@ -311,33 +276,33 @@ Click the component's updated version to go to a detailed list of its changes. C Tools System management - AMD SMI - 24.6.2 ⇒ 24.6.3 - AMD SMI + 24.6.3 ⇒ 24.6.3 + - rocminfo + rocminfo 1.0.0 - - ROCm Data Center Tool + ROCm Data Center Tool + 0.3.0 + + + + ROCm SMI + 7.3.0 + + + + ROCm Validation Suite 1.0.0 - - - - ROCm SMI - 7.3.0 ⇒ 7.3.0 - - - - ROCm Validation Suite - 1.0.0 - @@ -345,40 +310,40 @@ Click the component's updated version to go to a detailed list of its changes. C Performance - Omniperf + Omniperf 2.0.1 - - Omnitrace - 1.11.2 ⇒ 1.11.2 - Omnitrace + 1.11.2 + - ROCm Bandwidth + ROCm Bandwidth Test 1.4.0 - - ROCProfiler + ROCProfiler 2.0.0 - - ROCprofiler-SDK + ROCprofiler-SDK 0.4.0 - - ROCTracer + ROCTracer 4.1.0 - @@ -386,219 +351,114 @@ Click the component's updated version to go to a detailed list of its changes. C Development - HIPIFY - 18.0.0 ⇒ 18.0.0 - HIPIFY + 18.0.0 + - ROCdbgapi + ROCdbgapi 0.76.0 - - ROCm CMake + ROCm CMake 0.13.0 - - ROCm Debugger (ROCgdb) + ROCm Debugger (ROCgdb) 14.2 - - ROCr Debug Agent + ROCr Debug Agent 2.0.3 - Compilers - HIPCC + HIPCC 1.1.1 - - llvm-project + llvm-project 18.0.0 - Runtimes - HIP - 6.2 ⇒ 6.2.1 - HIP + 6.2.4 + - ROCr Runtime + ROCr Runtime 1.14.0 - -### Detailed component changes +## Detailed component changes The following sections describe key changes to ROCm components. -#### **AMD SMI** (24.6.3) +### **AMD SMI** (24.6.3) -##### Changes +#### Resolved issues -* Added `amd-smi static --ras` on Guest VMs. Guest VMs can view enabled/disabled RAS features on Host cards. +* Fixed support for the API calls `amdsmi_get_gpu_process_isolation` and + `amdsmi_clean_gpu_local_data`, along with the `amd-smi set + --process-isolation <0 or 1>` command. See issue + [#3500](https://github.com/ROCm/ROCm/issues/3500) on GitHub. -##### Removals +### **rocFFT** (1.0.30) -* Removed `amd-smi metric --ecc` & `amd-smi metric --ecc-blocks` on Guest VMs. Guest VMs do not support getting current ECC counts from the Host cards. +#### Optimized -##### Resolved issues +* Implemented 1D kernels for factorizable sizes greater than 1024 and less than 2048. -* Fixed TypeError in `amd-smi process -G`. -* Updated CLI error strings to handle empty and invalid GPU/CPU inputs. -* Fixed Guest VM showing passthrough options. -* Fixed firmware formatting where leading 0s were missing. +#### Resolved issues -#### **HIP** (6.2.1) +* Fixed plan creation failure on some even-length real-complex transforms that use Bluestein's algorithm. -##### Resolved issues +### **rocSOLVER** (3.26.2) -* Soft hang when using `AMD_SERIALIZE_KERNEL` -* Memory leak in `hipIpcCloseMemHandle` +#### Resolved issues -#### **HIPIFY** (18.0.0) +* Fixed synchronization issue in STEIN. -##### Changes +## ROCm known issues -* Added CUDA 12.5.1 support -* Added cuDNN 9.2.1 support -* Added LLVM 18.1.8 support -* Added `hipBLAS` 64-bit APIs support -* Added Support for math constants `math_constants.h` +ROCm known issues are tracked on [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). +Known issues related to individual components are listed in the [Detailed component changes](#detailed-component-changes) +section. -#### **Omnitrace** (1.11.2) - -##### Known issues - -Perfetto can no longer open Omnitrace proto files. Loading Perfetto trace output `.proto` files in the latest version of `ui.perfetto.dev` can result in a dialog with the message, "Oops, something went wrong! Please file a bug." The information in the dialog will refer to an "Unknown field type." The workaround is to open the files with the previous version of the Perfetto UI found at [https://ui.perfetto.dev/v46.0-35b3d9845/#!/](https://ui.perfetto.dev/v46.0-35b3d9845/#!/). - -See [issue #3767](https://github.com/ROCm/ROCm/issues/3767) on GitHub. - -#### **RCCL** (2.20.5) - -##### Known issues - -On systems running Linux kernel 6.8.0, such as Ubuntu 24.04, Direct Memory Access (DMA) transfers between the GPU and NIC are disabled and impacts multi-node RCCL performance. -This issue was reproduced with RCCL 2.20.5 (ROCm 6.2.0 and 6.2.1) on systems with Broadcom Thor-2 NICs and affects other systems with RoCE networks using Linux 6.8.0 or newer. -Older RCCL versions are also impacted. - -This issue will be addressed in a future ROCm release. - -See [issue #3772](https://github.com/ROCm/ROCm/issues/3772) on GitHub. - -#### **rocAL** (2.0.0) - -##### Changes - -* The new version of rocAL introduces many new features, but does not modify any of the existing public API functions.However, the version number was incremented from 1.3 to 2.0. - Applications linked to version 1.3 must be recompiled to link against version 2.0. -* Added development and test packages. -* Added C++ rocAL audio unit test and Python script to run and compare the outputs. -* Added Python support for audio decoders. -* Added Pytorch iterator for audio. -* Added Python audio unit test and support to verify outputs. -* Added rocDecode for HW decode. -* Added support for: - * Audio loader and decoder, which uses libsndfile library to decode wav files - * Audio augmentation - PreEmphasis filter, Spectrogram, ToDecibels, Resample, NonSilentRegionDetection, MelFilterBank - * Generic augmentation - Slice, Normalize - * Reading from file lists in file reader - * Downmixing audio channels during decoding - * TensorTensorAdd and TensorScalarMultiply operations - * Uniform and Normal distribution nodes -* Image to tensor updates -* ROCm install - use case graphics removed - -##### Known issues - -* Dependencies are not installed with the rocAL package installer. Dependencies must be installed with the prerequisite setup script provided. See the [rocAL README on GitHub](https://github.com/ROCm/rocAL/blob/docs/6.2.1/README.md#prerequisites-setup-script) for details. - -#### **rocBLAS** (4.2.1) - -##### Removals - -* Removed Device_Memory_Allocation.pdf link in documentation. - -##### Resolved issues - -* Fixed error/warning message during `rocblas_set_stream()` call. - -#### **rocFFT** (1.0.29) - -##### Optimizations - -* Implemented 1D kernels for factorizable sizes less than 1024. - -#### **ROCm SMI** (7.3.0) - -##### Optimizations - -* Improved handling of UnicodeEncodeErrors with non UTF-8 locales. Non UTF-8 locales were causing crashes on UTF-8 special characters. - -##### Resolved issues - -* Fixed an issue where the Compute Partition tests segfaulted when AMDGPU was loaded with optional parameters. - -##### Known issues - -* When setting CPX as a partition mode, there is a DRM node limit of 64. This is a known limitation when multiple drivers are using the DRM nodes. The `ls /sys/class/drm` command can be used to see the number of DRM nodes, and the following steps can be used to remove unnecessary drivers: - - 1. Unload AMDGPU: `sudo rmmod amdgpu`. - 2. Remove any unnecessary drivers using `rmmod`. For example, to remove an AST driver, run `sudo rmmod ast`. - 3. Reload AMDGPU using `modprobe`: `sudo modprobe amdgpu`. - -#### **rocPRIM** (3.2.1) - -##### Optimizations - -* Improved performance of `block_reduce_warp_reduce` when warp size equals block size. - -### ROCm known issues - -ROCm known issues are tracked on [GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). Known issues related to -individual components are listed in the [Detailed component changes](detailed-component-changes) section. - -#### Instinct MI300X GPU recovery failure on uncorrectable errors - -For the AMD Instinct MI300X accelerator, GPU recovery resets triggered by uncorrectable errors (UE) might not complete -successfully, which can result in the system being left in an undefined state. A system reboot is needed to recover from -this state. Additionally, error logging might fail in these situations, hindering diagnostics. - -This issue is under investigation and will be resolved in a future ROCm release. - -See [issue #3766](https://github.com/ROCm/ROCm/issues/3766) on GitHub. - -### ROCm upcoming changes +## ROCm upcoming changes The following changes to the ROCm software stack are anticipated for future releases. -#### rocm-llvm-alt +### rocm-llvm-alt The `rocm-llvm-alt` package will be removed in an upcoming release. Users relying on the functionality provided by the closed-source compiler should transition to the open-source compiler. Once the `rocm-llvm-alt` package is removed, any compilation requesting functionality provided by the closed-source compiler will result in a Clang warning: "*[AMD] proprietary optimization compiler has been removed*". -#### rccl-rdma-sharp-plugins +### rccl-rdma-sharp-plugins The RCCL plugin package, `rccl-rdma-sharp-plugins`, will be removed in an upcoming ROCm release. diff --git a/docs/compatibility/compatibility-matrix-historical-6.0.csv b/docs/compatibility/compatibility-matrix-historical-6.0.csv index 7346fba44..1af1eca26 100644 --- a/docs/compatibility/compatibility-matrix-historical-6.0.csv +++ b/docs/compatibility/compatibility-matrix-historical-6.0.csv @@ -1,117 +1,117 @@ -ROCm Version,6.2.2,6.2.1,6.2.0, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0 - :ref:`Operating systems & kernels `,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,, - ,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2" - ,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5" - ,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2" - ,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8" - ,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4" - ,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9 - ,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,, - ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,, - :doc:`Architecture `,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3 - ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2 - ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA - ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3 - ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2 - ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,, - :doc:`GPU / LLVM target `,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100 - ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030 - ,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_ - ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a - ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908 - ,,,,,,,, - FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,, - :doc:`PyTorch `,"2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13" - :doc:`TensorFlow `,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1" - :doc:`JAX `,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26 - `ONNX Runtime `_,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1 - ,,,,,,,, - THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,, - `UCC `_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0 - `UCX `_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1 - ,,,,,,,, - THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,, - Thrust,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 - CUB,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 - ,,,,,,,, - KFD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,, - Tested user space versions,"6.1.x, 6.0.x","6.1.x, 6.0.x","6.1.x, 6.0.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.0.x, 5.7.x, 5.6.x" - ,,,,,,,, - ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,, - :doc:`Composable Kernel `,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0 - :doc:`MIGraphX `,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0 - :doc:`MIOpen `,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 - :doc:`MIVisionX `,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0 - :doc:`rocAL `,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 - :doc:`rocDecode `,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A - :doc:`rocPyDecode `,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A - :doc:`RPP `,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0 - ,,,,,,,, - COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,, - :doc:`RCCL `,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3 - ,,,,,,,, - MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,, - `half `_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0 - :doc:`hipBLAS `,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0 - :doc:`hipBLASLt `,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0 - :doc:`hipFFT `,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13 - :doc:`hipFORT `,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0 - :doc:`hipRAND `,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16 - :doc:`hipSOLVER `,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0 - :doc:`hipSPARSE `,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 - :doc:`hipSPARSELt `,0.2.1,0.2.1,0.2.1,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0 - :doc:`rocALUTION `,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3 - :doc:`rocBLAS `,4.2.1,4.2.1,4.2.0,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0 - :doc:`rocFFT `,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23 - :doc:`rocRAND `,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17 - :doc:`rocSOLVER `,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0 - :doc:`rocSPARSE `,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2 - :doc:`rocWMMA `,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0 - `Tensile `_,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0 - ,,,,,,,, - PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,, - :doc:`hipCUB `,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 - :doc:`hipTensor `,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0 - :doc:`rocPRIM `,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 - :doc:`rocThrust `,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 - ,,,,,,,, - SUPPORT LIBS,,,,,,,, - `hipother `_,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 - `rocm-core `_,6.2.2,6.2.1,6.2.0,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0 - `ROCT-Thunk-Interface `_,20240607.4.05,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245 - ,,,,,,,, - SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,, - :doc:`AMD SMI `,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2 - :doc:`ROCm Data Center Tool `,1.0.0,1.0.0,1.0.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0 - :doc:`rocminfo `,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 - :doc:`ROCm SMI `,7.3.0,7.3.0,7.3.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0 - :doc:`ROCm Validation Suite `,rocm-6.2.2,rocm-6.2.1,rocm-6.2.0,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0 - ,,,,,,,, - PERFORMANCE TOOLS,,,,,,,, - :doc:`Omniperf `,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A - :doc:`Omnitrace `,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A - :doc:`ROCm Bandwidth Test `,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0 - :doc:`ROCProfiler `,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000 - :doc:`ROCprofiler-SDK `,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A - :doc:`ROCTracer `,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000 - ,,,,,,,, - DEVELOPMENT TOOLS,,,,,,,, - :doc:`HIPIFY `,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 - :doc:`ROCm CMake `,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0 - :doc:`ROCdbgapi `,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0 - :doc:`ROCm Debugger (ROCgdb) `,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0 - `rocprofiler-register `_,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,N/A,N/A - :doc:`ROCr Debug Agent `,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3 - ,,,,,,,, - COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,, - `clang-ocl `_,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0 - :doc:`hipCC `,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 - `Flang `_,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 - :doc:`llvm-project `,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 - `OpenMP `_,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 - ,,,,,,,, - RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,, - :doc:`AMD CLR `,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 - :doc:`HIP `,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 - `OpenCL Runtime `_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0 - :doc:`ROCR-Runtime `,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0 \ No newline at end of file +ROCm Version,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0 + :ref:`Operating systems & kernels `,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,, + ,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2" + ,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5" + ,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.4 [#red-hat94-past-60]_, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2" + ,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8" + ,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4" + ,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9 + ,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,Oracle Linux 8.9 [#oracle89-past-60]_,,, + ,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`Architecture `,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3 + ,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2 + ,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA + ,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3 + ,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2 + ,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`GPU / LLVM target `,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100 + ,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030 + ,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_ + ,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a + ,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908 + ,,,,,,,,, + FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`PyTorch `,"2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13" + :doc:`TensorFlow `,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1" + :doc:`JAX `,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26 + `ONNX Runtime `_,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1 + ,,,,,,,,, + THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,, + `UCC `_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0 + `UCX `_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1 + ,,,,,,,,, + THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,, + Thrust,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 + CUB,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1 + ,,,,,,,,, + KFD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,, + Tested user space versions,"6.1.x, 6.0.x","6.1.x, 6.0.x","6.1.x, 6.0.x","6.1.x, 6.0.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x","6.2.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.0.x, 5.7.x, 5.6.x" + ,,,,,,,,, + ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`Composable Kernel `,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0 + :doc:`MIGraphX `,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0 + :doc:`MIOpen `,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 + :doc:`MIVisionX `,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0 + :doc:`rocAL `,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 + :doc:`rocDecode `,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A + :doc:`rocPyDecode `,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A + :doc:`RPP `,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0 + ,,,,,,,,, + COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`RCCL `,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3 + ,,,,,,,,, + MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,, + `half `_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0 + :doc:`hipBLAS `,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0 + :doc:`hipBLASLt `,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0 + :doc:`hipFFT `,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13 + :doc:`hipFORT `,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0 + :doc:`hipRAND `,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16 + :doc:`hipSOLVER `,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0 + :doc:`hipSPARSE `,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 + :doc:`hipSPARSELt `,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0 + :doc:`rocALUTION `,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3 + :doc:`rocBLAS `,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0 + :doc:`rocFFT `,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23 + :doc:`rocRAND `,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17 + :doc:`rocSOLVER `,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0 + :doc:`rocSPARSE `,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2 + :doc:`rocWMMA `,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0 + `Tensile `_,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0 + ,,,,,,,,, + PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`hipCUB `,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 + :doc:`hipTensor `,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0 + :doc:`rocPRIM `,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0 + :doc:`rocThrust `,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0 + ,,,,,,,,, + SUPPORT LIBS,,,,,,,,, + `hipother `_,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 + `rocm-core `_,6.2.4,6.2.2,6.2.1,6.2.0,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0 + `ROCT-Thunk-Interface `_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245 + ,,,,,,,,, + SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`AMD SMI `,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2 + :doc:`ROCm Data Center Tool `,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0 + :doc:`rocminfo `,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 + :doc:`ROCm SMI `,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0 + :doc:`ROCm Validation Suite `,rocm-6.2.4,rocm-6.2.2,rocm-6.2.1,rocm-6.2.0,rocm-6.1.2,rocm-6.1.1,rocm-6.1.0,rocm-6.0.2,rocm-6.0.0 + ,,,,,,,,, + PERFORMANCE TOOLS,,,,,,,,, + :doc:`Omniperf `,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A + :doc:`Omnitrace `,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A + :doc:`ROCm Bandwidth Test `,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0 + :doc:`ROCProfiler `,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000 + :doc:`ROCprofiler-SDK `,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A + :doc:`ROCTracer `,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000 + ,,,,,,,,, + DEVELOPMENT TOOLS,,,,,,,,, + :doc:`HIPIFY `,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 + :doc:`ROCm CMake `,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0 + :doc:`ROCdbgapi `,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0 + :doc:`ROCm Debugger (ROCgdb) `,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0 + `rocprofiler-register `_,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,N/A,N/A + :doc:`ROCr Debug Agent `,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3 + ,,,,,,,,, + COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,, + `clang-ocl `_,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0 + :doc:`hipCC `,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0 + `Flang `_,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 + :doc:`llvm-project `,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 + `OpenMP `_,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483 + ,,,,,,,,, + RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,, + :doc:`AMD CLR `,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 + :doc:`HIP `,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830 + `OpenCL Runtime `_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0 + :doc:`ROCR-Runtime `,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0 diff --git a/docs/compatibility/compatibility-matrix.rst b/docs/compatibility/compatibility-matrix.rst index 98586d9b1..40a301437 100644 --- a/docs/compatibility/compatibility-matrix.rst +++ b/docs/compatibility/compatibility-matrix.rst @@ -10,17 +10,19 @@ Use this matrix to view the ROCm compatibility and system requirements across su You can also refer to the :ref:`past versions of ROCm compatibility matrix`. +Accelerators and GPUs listed in the following table support compute workloads (no display information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics workloads, see the `Use ROCm on Radeon GPU documentation `_ to verify compatibility and system requirements. + .. |br| raw:: html
.. container:: format-big-table - .. csv-table:: - :header: "ROCm Version", "6.2.2", "6.2.1", "6.1.0" + .. csv-table:: + :header: "ROCm Version", "6.2.4", "6.2.2", "6.1.0" :stub-columns: 1 - :ref:`Operating systems & kernels `,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04 + :ref:`Operating systems & kernels `,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04", ,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.4, 22.04.3" ,,,"Ubuntu 20.04.6, 20.04.5" ,"RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4 [#red-hat94]_, 9.3, 9.2" @@ -37,13 +39,13 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix`,gfx1100,gfx1100,gfx1100 ,gfx1030,gfx1030,gfx1030 - ,gfx942 [#mi300_622]_,gfx942 [#mi300_621]_, gfx942 [#mi300_610]_ + ,gfx942 [#mi300_624]_,gfx942 [#mi300_622]_, gfx942 [#mi300_610]_ ,gfx90a,gfx90a,gfx90a ,gfx908,gfx908,gfx908 ,,, FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,, :doc:`PyTorch `,"2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13" - :doc:`TensorFlow `,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15, 2.14, 2.13" + :doc:`TensorFlow `,"2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1" :doc:`JAX `,0.4.26,0.4.26,0.4.26 `ONNX Runtime `_,1.17.3,1.17.3,1.17.3 ,,, @@ -75,49 +77,49 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix`_ ,1.12.0,1.12.0,1.12.0 :doc:`hipBLAS `,2.2.0,2.2.0,2.1.0 :doc:`hipBLASLt `,0.8.0,0.8.0,0.7.0 - :doc:`hipFFT `,1.0.15,1.0.15,1.0.14 + :doc:`hipFFT `,1.0.16,1.0.15,1.0.14 :doc:`hipFORT `,0.4.0,0.4.0,0.4.0 - :doc:`hipRAND `,2.11.0,2.11.0,2.10.16 + :doc:`hipRAND `,2.11.1,2.11.0,2.10.16 :doc:`hipSOLVER `,2.2.0,2.2.0,2.1.0 :doc:`hipSPARSE `,3.1.1,3.1.1,3.0.1 :doc:`hipSPARSELt `,0.2.1,0.2.1,0.1.0 - :doc:`rocALUTION `,3.2.0,3.2.0,3.1.1 - :doc:`rocBLAS `,4.2.1,4.2.1,4.1.0 - :doc:`rocFFT `,1.0.29,1.0.29,1.0.26 - :doc:`rocRAND `,3.1.0,3.1.0,3.0.1 - :doc:`rocSOLVER `,3.26.0,3.26.0,3.25.0 - :doc:`rocSPARSE `,3.2.0,3.2.0,3.1.2 + :doc:`rocALUTION `,3.2.1,3.2.0,3.1.1 + :doc:`rocBLAS `,4.2.4,4.2.1,4.1.0 + :doc:`rocFFT `,1.0.30,1.0.29,1.0.26 + :doc:`rocRAND `,3.1.1,3.1.0,3.0.1 + :doc:`rocSOLVER `,3.26.2,3.26.0,3.25.0 + :doc:`rocSPARSE `,3.2.1,3.2.0,3.1.2 :doc:`rocWMMA `,1.5.0,1.5.0,1.4.0 `Tensile `_,4.40.0,4.40.0,4.40.0 ,,, PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,, - :doc:`hipCUB `,3.2.0,3.2.0,3.1.0 + :doc:`hipCUB `,3.2.1,3.2.0,3.1.0 :doc:`hipTensor `,1.3.0,1.3.0,1.2.0 - :doc:`rocPRIM `,3.2.0,3.2.0,3.1.0 - :doc:`rocThrust `,3.1.0,3.1.0,3.0.1 + :doc:`rocPRIM `,3.2.2,3.2.0,3.1.0 + :doc:`rocThrust `,3.1.1,3.1.0,3.0.1 ,,, SUPPORT LIBS,,, `hipother `_,6.2.41134,6.2.41134,6.1.40091 - `rocm-core `_,6.2.2,6.2.1,6.1.0 - `ROCT-Thunk-Interface `_,20240607.4.05,20240607.4.05,20240125.3.30 + `rocm-core `_,6.2.4,6.2.2,6.1.0 + `ROCT-Thunk-Interface `_,20240607.5.7,20240607.5.7,20240125.3.30 ,,, SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,, :doc:`AMD SMI `,24.6.3,24.6.3,24.4.1 - :doc:`ROCm Data Center Tool `,1.0.0,1.0.0,0.3.0 + :doc:`ROCm Data Center Tool `,0.3.0,0.3.0,0.3.0 :doc:`rocminfo `,1.0.0,1.0.0,1.0.0 :doc:`ROCm SMI `,7.3.0,7.3.0,7.0.0 - :doc:`ROCm Validation Suite `,rocm-6.2.2,rocm-6.2.1,rocm-6.1.0 + :doc:`ROCm Validation Suite `,rocm-6.2.4,rocm-6.2.2,rocm-6.1.0 ,,, PERFORMANCE TOOLS,,, :doc:`Omniperf `,2.0.1,2.0.1,N/A :doc:`Omnitrace `,1.11.2,1.11.2,N/A :doc:`ROCm Bandwidth Test `,1.4.0,1.4.0,1.4.0 - :doc:`ROCProfiler `,2.0.60202,2.0.60201,2.0.60100 + :doc:`ROCProfiler `,2.0.60204,2.0.60202,2.0.60100 :doc:`ROCprofiler-SDK `,0.4.0,0.4.0,N/A - :doc:`ROCTracer `,4.1.60202,4.1.60201,4.1.60100 + :doc:`ROCTracer `,4.1.60204,4.1.60202,4.1.60100 ,,, DEVELOPMENT TOOLS,,, - :doc:`HIPIFY `,18.0.0.24355,18.0.0.24355,17.0.0.24103 + :doc:`HIPIFY `,18.0.0.24392,18.0.0.24355,17.0.0.24103 :doc:`ROCm CMake `,0.13.0,0.13.0,0.12.0 :doc:`ROCdbgapi `,0.76.0,0.76.0,0.71.0 :doc:`ROCm Debugger (ROCgdb) `,14.2.0,14.2.0,14.1.0 @@ -127,9 +129,9 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix`_,N/A,N/A,0.5.0 :doc:`hipCC `,1.1.1,1.1.1,1.0.0 - `Flang `_,18.0.0.24355,18.0.0.24355,17.0.0.24103 - :doc:`llvm-project `,18.0.0.24355,18.0.0.24355,17.0.0.24103 - `OpenMP `_,18.0.0.24355,18.0.0.24355,17.0.0.24103 + `Flang `_,18.0.0.24392,18.0.0.24355,17.0.0.24103 + :doc:`llvm-project `,18.0.0.24392,18.0.0.24355,17.0.0.24103 + `OpenMP `_,18.0.0.24392,18.0.0.24355,17.0.0.24103 ,,, RUNTIMES,.. _runtime-support-compatibility-matrix:,, :doc:`AMD CLR `,6.2.41134,6.2.41134,6.1.40091 @@ -142,8 +144,8 @@ You can also refer to the :ref:`past versions of ROCm compatibility matrix - - - - - -# Inference optimization with MIGraphX - -The following sections cover inferencing and introduces [MIGraphX](https://rocm.docs.amd.com/projects/AMDMIGraphX/en/latest/). - -## Inference - -The inference is where capabilities learned during deep-learning training are put to work. It refers to using a fully trained neural network to make conclusions (predictions) on unseen data that the model has never interacted with before. Deep-learning inferencing is achieved by feeding new data, such as new images, to the network, giving the Deep Neural Network a chance to classify the image. - -Taking our previous example of MNIST, the DNN can be fed new images of handwritten digit images, allowing the neural network to classify digits. A fully trained DNN should make accurate predictions about what an image represents, and inference cannot happen without training. - -## MIGraphX introduction - -MIGraphX is a graph compiler focused on accelerating the machine-learning inference that can target AMD GPUs and CPUs. MIGraphX accelerates the machine-learning models by leveraging several graph-level transformations and optimizations. These optimizations include: - -* Operator fusion -* Arithmetic simplifications -* Dead-code elimination -* Common subexpression elimination (CSE) -* Constant propagation - -After doing all these transformations, MIGraphX emits code for the AMD GPU by calling to MIOpen or rocBLAS or creating HIP kernels for a particular operator. MIGraphX can also target CPUs using DNNL or ZenDNN libraries. - -MIGraphX provides easy-to-use APIs in C++ and Python to import machine models in ONNX or TensorFlow. Users can compile, save, load, and run these models using the MIGraphX C++ and Python APIs. Internally, MIGraphX parses ONNX or TensorFlow models into internal graph representation where each operator in the model gets mapped to an operator within MIGraphX. Each of these operators defines various attributes such as: - -* Number of arguments -* Type of arguments -* Shape of arguments - -After optimization passes, all these operators get mapped to different kernels on GPUs or CPUs. - -After importing a model into MIGraphX, the model is represented as `migraphx::program`. `migraphx::program` is made up of `migraphx::module`. The program can consist of several modules, but it always has one main_module. Modules are made up of `migraphx::instruction_ref`. Instructions contain the `migraphx::op` and arguments to the operator.   - -## Installing MIGraphX - -There are three options to get started with MIGraphX installation. MIGraphX depends on ROCm libraries; assume that the machine has ROCm installed. - -### Option 1: installing binaries - -To install MIGraphX on Debian-based systems like Ubuntu, use the following command: - -```bash -sudo apt update && sudo apt install -y migraphx -``` - -The header files and libraries are installed under `/opt/rocm-\`, where \ is the ROCm version. - -### Option 2: building from source - -There are two ways to build the MIGraphX sources. - -* [Use the ROCm build tool](https://github.com/ROCm/AMDMIGraphX#use-the-rocm-build-tool-rbuild) - This approach uses `[rbuild](https://github.com/ROCm/rbuild)` to install the prerequisites and build the libraries with just one command. - - or - -* [Use CMake](https://github.com/ROCm/AMDMIGraphX#use-cmake-to-build-migraphx) - This approach uses a script to install the prerequisites, then uses CMake to build the source. - -For detailed steps on building from source and installing dependencies, refer to the following `README` file: - -[https://github.com/ROCm/AMDMIGraphX#building-from-source](https://github.com/ROCm/AMDMIGraphX#building-from-source) - -### Option 3: use docker - -To use Docker, follow these steps: - -1. The easiest way to set up the development environment is to use Docker. To build Docker from scratch, first clone the MIGraphX repository by running: - - ```bash - git clone --recursive https://github.com/ROCm/AMDMIGraphX - ``` - -2. The repository contains a Dockerfile from which you can build a Docker image as: - - ```bash - docker build -t migraphx . - ``` - -3. Then to enter the development environment, use Docker run: - - ```bash - docker run --device='/dev/kfd' --device='/dev/dri' -v=`pwd`:/code/AMDMIGraphX -w /code/AMDMIGraphX --group-add video -it migraphx - ``` - -The Docker image contains all the prerequisites required for the installation, so users can go to the folder `/code/AMDMIGraphX` and follow the steps mentioned in [Option 2: Building from Source](#option-2-building-from-source). - -## MIGraphX example - -MIGraphX provides both C++ and Python APIs. The following sections show examples of both using the Inception v3 model. To walk through the examples, fetch the Inception v3 ONNX model by running the following: - -```py -import torch -import torchvision.models as models -inception = models.inception_v3(pretrained=True) -torch.onnx.export(inception,torch.randn(1,3,299,299), "inceptioni1.onnx") -``` - -This will create `inceptioni1.onnx`, which can be imported in MIGraphX using C++ or Python API. - -### MIGraphX Python API - -Follow these steps: - -1. To import the MIGraphX module in Python script, set `PYTHONPATH` to the MIGraphX libraries installation. If binaries are installed using steps mentioned in [Option 1: Installing Binaries](#option-1-installing-binaries), perform the following action: - - ```bash - export PYTHONPATH=$PYTHONPATH:/opt/rocm/ - ``` - -2. The following script shows the usage of Python API to import the ONNX model, compile it, and run inference on it. Set `LD_LIBRARY_PATH` to `/opt/rocm/` if required. - - ```py - # import migraphx and numpy - import migraphx - import numpy as np - # import and parse inception model - model = migraphx.parse_onnx("inceptioni1.onnx") - # compile model for the GPU target - model.compile(migraphx.get_target("gpu")) - # optionally print compiled model - model.print() - # create random input image - input_image = np.random.rand(1, 3, 299, 299).astype('float32') - # feed image to model, 'x.1` is the input param name - results = model.run({'x.1': input_image}) - # get the results back - result_np = np.array(results[0]) - # print the inferred class of the input image - print(np.argmax(result_np)) - ``` - - Find additional examples of Python API in the `/examples` directory of the MIGraphX repository. - -## MIGraphX C++ API - -Follow these steps: - -1. The following is a minimalist example that shows the usage of MIGraphX C++ API to load ONNX file, compile it for the GPU, and run inference on it. To use MIGraphX C++ API, you only need to load the `migraphx.hpp` file. This example runs inference on the Inception v3 model. - - ```c++ - #include - #include - #include - #include - #include - #include - - int main(int argc, char** argv) - { - migraphx::program prog; - migraphx::onnx_options onnx_opts; - // import and parse onnx file into migraphx::program - prog = parse_onnx("inceptioni1.onnx", onnx_opts); - // print imported model - prog.print(); - migraphx::target targ = migraphx::target("gpu"); - migraphx::compile_options comp_opts; - comp_opts.set_offload_copy(); - // compile for the GPU - prog.compile(targ, comp_opts); - // print the compiled program - prog.print(); - // randomly generate input image - // of shape (1, 3, 299, 299) - std::srand(unsigned(std::time(nullptr))); - std::vector input_image(1*299*299*3); - std::generate(input_image.begin(), input_image.end(), std::rand); - // users need to provide data for the input - // parameters in order to run inference - // you can query into migraph program for the parameters - migraphx::program_parameters prog_params; - auto param_shapes = prog.get_parameter_shapes(); - auto input = param_shapes.names().front(); - // create argument for the parameter - prog_params.add(input, migraphx::argument(param_shapes[input], input_image.data())); - // run inference - auto outputs = prog.eval(prog_params); - // read back the output - float* results = reinterpret_cast(outputs[0].data()); - float* max = std::max_element(results, results + 1000); - int answer = max - results; - std::cout << "answer: " << answer << std::endl; - } - ``` - -2. To compile this program, you can use CMake and you only need to link the `migraphx::c` library to use MIGraphX's C++ API. The following is the `CMakeLists.txt` file that can build the earlier example: - - ```cmake - cmake_minimum_required(VERSION 3.5) - project (CAI) - - set (CMAKE_CXX_STANDARD 14) - set (EXAMPLE inception_inference) - - list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) - find_package (migraphx) - - message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE}) - add_executable(${EXAMPLE} ${EXAMPLE}.cpp) - - target_link_libraries(${EXAMPLE} migraphx::c) - ``` - -3. To build the executable file, run the following from the directory containing the `inception_inference.cpp` file: - - ```bash - mkdir build - cd build - cmake .. - make -j$(nproc) - ./inception_inference - ``` - -:::{note} - Set `LD_LIBRARY_PATH` to `/opt/rocm/lib` if required during the build. Additional examples can be found in the MIGraphX repository under the `/examples/` directory. -::: - -## Tuning MIGraphX - -MIGraphX uses MIOpen kernels to target AMD GPU. For the model compiled with MIGraphX, tune MIOpen to pick the best possible kernel implementation. The MIOpen tuning results in a significant performance boost. Tuning can be done by setting the environment variable `MIOPEN_FIND_ENFORCE=3`. - -:::{note} - The tuning process can take a long time to finish. -::: - -**Example:** The average inference time of the inception model example shown previously over 100 iterations using untuned kernels is 0.01383ms. After tuning, it reduces to 0.00459ms, which is a 3x improvement. This result is from ROCm v4.5 on a MI100 GPU. - -:::{note} - The results may vary depending on the system configurations. -::: - -For reference, the following code snippet shows inference runs for only the first 10 iterations for both tuned and untuned kernels: - -```console -### UNTUNED ### -iterator : 0 -Inference complete -Inference time: 0.063ms -iterator : 1 -Inference complete -Inference time: 0.008ms -iterator : 2 -Inference complete -Inference time: 0.007ms -iterator : 3 -Inference complete -Inference time: 0.007ms -iterator : 4 -Inference complete -Inference time: 0.007ms -iterator : 5 -Inference complete -Inference time: 0.008ms -iterator : 6 -Inference complete -Inference time: 0.007ms -iterator : 7 -Inference complete -Inference time: 0.028ms -iterator : 8 -Inference complete -Inference time: 0.029ms -iterator : 9 -Inference complete -Inference time: 0.029ms - -### TUNED ### -iterator : 0 -Inference complete -Inference time: 0.063ms -iterator : 1 -Inference complete -Inference time: 0.004ms -iterator : 2 -Inference complete -Inference time: 0.004ms -iterator : 3 -Inference complete -Inference time: 0.004ms -iterator : 4 -Inference complete -Inference time: 0.004ms -iterator : 5 -Inference complete -Inference time: 0.004ms -iterator : 6 -Inference complete -Inference time: 0.004ms -iterator : 7 -Inference complete -Inference time: 0.004ms -iterator : 8 -Inference complete -Inference time: 0.004ms -iterator : 9 -Inference complete -Inference time: 0.004ms -``` - -### YModel - -The best inference performance through MIGraphX is conditioned upon having tuned kernel configurations stored in a `/home` local User Database (DB). If a user were to move their model to a different server or allow a different user to use it, they would have to run through the MIOpen tuning process again to populate the next User DB with the best kernel configurations and corresponding solvers. - -Tuning is time consuming, and if the users have not performed tuning, they would see discrepancies between expected or claimed inference performance and actual inference performance. This has led to repetitive and time-consuming tuning tasks for each user. - -MIGraphX introduces a feature, known as YModel, that stores the kernel config parameters found during tuning into a `.mxr` file. This ensures the same level of expected performance, even when a model is copied to a different user/system. - -The YModel feature is available starting from ROCm 5.4.1 and UIF 1.1. - -#### YModel example - -Through the `migraphx-driver` functionality, you can generate `.mxr` files with tuning information stored inside it by passing additional `--binary --output model.mxr` to `migraphx-driver` along with the rest of the necessary flags. - -For example, to generate `.mxr` file from the ONNX model, use the following: - -```bash -./path/to/migraphx-driver compile --onnx resnet50.onnx --enable-offload-copy --binary --output resnet50.mxr -``` - -To run generated `.mxr` files through `migraphx-driver`, use the following: - -```bash -./path/to/migraphx-driver run --migraphx resnet50.mxr --enable-offload-copy -``` - -Alternatively, you can use the MIGraphX C++ or Python API to generate `.mxr` files. - -![Generating an MXR file](../data/conceptual/image018.png "Generating an MXR file") diff --git a/docs/conf.py b/docs/conf.py index f20f6cd76..7c74b94f6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,15 +30,15 @@ if os.environ.get("READTHEDOCS", "") == "True": project = "ROCm Documentation" author = "Advanced Micro Devices, Inc." copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved." -version = "6.2.2" -release = "6.2.2" +version = "6.2.4" +release = "6.2.4" setting_all_article_info = True all_article_info_os = ["linux", "windows"] all_article_info_author = "" # pages with specific settings article_pages = [ - {"file": "about/release-notes", "os": ["linux", "windows"], "date": "2024-09-27"}, + {"file": "about/release-notes", "os": ["linux", "windows"], "date": "2024-11-06"}, {"file": "how-to/deep-learning-rocm", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/index", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/install", "os": ["linux"]}, @@ -81,6 +81,7 @@ article_pages = [ "file": "how-to/llm-fine-tuning-optimization/profiling-and-debugging", "os": ["linux"], }, + {"file": "how-to/performance-validation/mi300x/vllm-benchmark", "os": ["linux"]}, {"file": "how-to/system-optimization/index", "os": ["linux"]}, {"file": "how-to/system-optimization/mi300x", "os": ["linux"]}, {"file": "how-to/system-optimization/mi200", "os": ["linux"]}, diff --git a/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst b/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst index 3ee672353..84e839391 100644 --- a/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst +++ b/docs/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst @@ -16,7 +16,7 @@ This section discusses how to implement `vLLM `_ vLLM inference ============== -vLLM is renowned for its paged attention algorithm that can reduce memory consumption and increase throughput thanks to +vLLM is renowned for its PagedAttention algorithm that can reduce memory consumption and increase throughput thanks to its paging scheme. Instead of allocating GPU high-bandwidth memory (HBM) for the maximum output token lengths of the models, the paged attention of vLLM allocates GPU HBM dynamically for its actual decoding lengths. This paged attention is also effective when multiple requests share the same key and value contents for a large value of beam search or @@ -139,9 +139,7 @@ Refer to :ref:`mi300x-vllm-optimization` for performance optimization tips. ROCm provides a prebuilt optimized Docker image for validating the performance of LLM inference with vLLM on the MI300X accelerator. The Docker image includes ROCm, vLLM, PyTorch, and tuning files in the CSV -format. For more information, see the guide to -`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator `_ -on the ROCm GitHub repository. +format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`. .. _fine-tuning-llms-tgi: diff --git a/docs/how-to/performance-validation/mi300x/vllm-benchmark.rst b/docs/how-to/performance-validation/mi300x/vllm-benchmark.rst new file mode 100644 index 000000000..90883ea84 --- /dev/null +++ b/docs/how-to/performance-validation/mi300x/vllm-benchmark.rst @@ -0,0 +1,407 @@ +.. meta:: + :description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the unified + ROCm Docker image. + :keywords: model, MAD, automation, dashboarding, validate + +*********************************************************** +LLM inference performance validation on AMD Instinct MI300X +*********************************************************** + +.. _vllm-benchmark-unified-docker: + +The `ROCm vLLM Docker `_ image offers +a prebuilt, optimized environment designed for validating large language model +(LLM) inference performance on the AMD Instinct™ MI300X accelerator. This +ROCm vLLM Docker image integrates vLLM and PyTorch tailored specifically for the +MI300X accelerator and includes the following components: + +* `ROCm 6.2.1 `_ + +* `vLLM 0.6.4 `_ + +* `PyTorch 2.5.0 `_ + +* Tuning files (in CSV format) + +With this Docker image, you can quickly validate the expected inference +performance numbers on the MI300X accelerator. This topic also provides tips on +optimizing performance with popular AI models. + +.. hlist:: + :columns: 6 + + * Llama 3.1 8B + + * Llama 3.1 70B + + * Llama 3.1 405B + + * Llama 2 7B + + * Llama 2 70B + + * Mixtral 8x7B + + * Mixtral 8x22B + + * Mixtral 7B + + * Qwen2 7B + + * Qwen2 72B + + * JAIS 13B + + * JAIS 30B + +.. _vllm-benchmark-vllm: + +.. note:: + + vLLM is a toolkit and library for LLM inference and serving. AMD implements + high-performance custom kernels and modules in vLLM to enhance performance. + See :ref:`fine-tuning-llms-vllm` and :ref:`mi300x-vllm-optimization` for + more information. + +Getting started +=============== + +Use the following procedures to reproduce the benchmark results on an +MI300X accelerator with the prebuilt vLLM Docker image. + +.. _vllm-benchmark-get-started: + +1. Disable NUMA auto-balancing. + + To optimize performance, disable automatic NUMA balancing. Otherwise, the GPU + might hang until the periodic balancing is finalized. For more information, + see :ref:`AMD Instinct MI300X system optimization `. + + .. code-block:: shell + + # disable automatic NUMA balancing + sh -c 'echo 0 > /proc/sys/kernel/numa_balancing' + # check if NUMA balancing is disabled (returns 0 if disabled) + cat /proc/sys/kernel/numa_balancing + 0 + +2. Download the :ref:`ROCm vLLM Docker image `. + + Use the following command to pull the Docker image from Docker Hub. + + .. code-block:: shell + + docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 + +Once setup is complete, you can choose between two options to reproduce the +benchmark results: + +- :ref:`MAD-integrated benchmarking ` + +- :ref:`Standalone benchmarking ` + +.. _vllm-benchmark-mad: + +MAD-integrated benchmarking +=========================== + +Clone the ROCm Model Automation and Dashboarding (``__) repository to a local +directory and install the required packages on the host machine. + +.. code-block:: shell + + git clone https://github.com/ROCm/MAD + cd MAD + pip install -r requirements.txt + +Use this command to run a performance benchmark test of the Llama 3.1 8B model +on one GPU with ``float16`` data type in the host machine. + +.. code-block:: shell + + export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models" + python3 tools/run_models.py --tags pyt_vllm_llama-3.1-8b --keep-model-dir --live-output --timeout 28800 + +ROCm MAD launches a Docker container with the name +``container_ci-pyt_vllm_llama-3.1-8b``. The latency and throughput reports of the +model are collected in the following path: ``~/MAD/reports_float16/``. + +Although the following models are preconfigured to collect latency and +throughput performance data, you can also change the benchmarking parameters. +Refer to the :ref:`Standalone benchmarking ` section. + +Available models +---------------- + +.. hlist:: + :columns: 3 + + * ``pyt_vllm_llama-3.1-8b`` + + * ``pyt_vllm_llama-3.1-70b`` + + * ``pyt_vllm_llama-3.1-405b`` + + * ``pyt_vllm_llama-2-7b`` + + * ``pyt_vllm_llama-2-70b`` + + * ``pyt_vllm_mixtral-8x7b`` + + * ``pyt_vllm_mixtral-8x22b`` + + * ``pyt_vllm_mistral-7b`` + + * ``pyt_vllm_qwen2-7b`` + + * ``pyt_vllm_qwen2-72b`` + + * ``pyt_vllm_jais-13b`` + + * ``pyt_vllm_jais-30b`` + + * ``pyt_vllm_llama-3.1-8b_fp8`` + + * ``pyt_vllm_llama-3.1-70b_fp8`` + + * ``pyt_vllm_llama-3.1-405b_fp8`` + + * ``pyt_vllm_mixtral-8x7b_fp8`` + + * ``pyt_vllm_mixtral-8x22b_fp8`` + +.. _vllm-benchmark-standalone: + +Standalone benchmarking +======================= + +You can run the vLLM benchmark tool independently by starting the +:ref:`Docker container ` as shown in the following +snippet. + +.. code-block:: + + docker pull rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 + docker run -it --device=/dev/kfd --device=/dev/dri --group-add video --shm-size 128G --security-opt seccomp=unconfined --security-opt apparmor=unconfined --cap-add=SYS_PTRACE -v $(pwd):/workspace --env HUGGINGFACE_HUB_CACHE=/workspace --name vllm_v0.6.4 rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 + +In the Docker container, clone the ROCm MAD repository and navigate to the +benchmark scripts directory at ``~/MAD/scripts/vllm``. + +.. code-block:: + + git clone https://github.com/ROCm/MAD + cd MAD/scripts/vllm + +Command +------- + +To start the benchmark, use the following command with the appropriate options. +See :ref:`Options ` for the list of +options and their descriptions. + +.. code-block:: shell + + ./vllm_benchmark_report.sh -s $test_option -m $model_repo -g $num_gpu -d $datatype + +See the :ref:`examples ` for more information. + +.. note:: + + The input sequence length, output sequence length, and tensor parallel (TP) are + already configured. You don't need to specify them with this script. + +.. note:: + + If you encounter the following error, pass your access-authorized Hugging + Face token to the gated models. + + .. code-block:: shell + + OSError: You are trying to access a gated repo. + + # pass your HF_TOKEN + export HF_TOKEN=$your_personal_hf_token + +.. _vllm-benchmark-standalone-options: + +Options +------- + +.. list-table:: + :header-rows: 1 + :align: center + + * - Name + - Options + - Description + + * - ``$test_option`` + - latency + - Measure decoding token latency + + * - + - throughput + - Measure token generation throughput + + * - + - all + - Measure both throughput and latency + + * - ``$model_repo`` + - ``meta-llama/Meta-Llama-3.1-8B-Instruct`` + - Llama 3.1 8B + + * - (``float16``) + - ``meta-llama/Meta-Llama-3.1-70B-Instruct`` + - Llama 3.1 70B + + * - + - ``meta-llama/Meta-Llama-3.1-405B-Instruct`` + - Llama 3.1 405B + + * - + - ``meta-llama/Llama-2-7b-chat-hf`` + - Llama 2 7B + + * - + - ``meta-llama/Llama-2-70b-chat-hf`` + - Llama 2 70B + + * - + - ``mistralai/Mixtral-8x7B-Instruct-v0.1`` + - Mixtral 8x7B + + * - + - ``mistralai/Mixtral-8x22B-Instruct-v0.1`` + - Mixtral 8x22B + + * - + - ``mistralai/Mistral-7B-Instruct-v0.3`` + - Mixtral 7B + + * - + - ``Qwen/Qwen2-7B-Instruct`` + - Qwen2 7B + + * - + - ``Qwen/Qwen2-72B-Instruct`` + - Qwen2 72B + + * - + - ``core42/jais-13b-chat`` + - JAIS 13B + + * - + - ``core42/jais-30b-chat-v3`` + - JAIS 30B + + * - ``$model_repo`` + - ``amd/Meta-Llama-3.1-8B-Instruct-FP8-KV`` + - Llama 3.1 8B + + * - (``float8``) + - ``amd/Meta-Llama-3.1-70B-Instruct-FP8-KV`` + - Llama 3.1 70B + + * - + - ``amd/Meta-Llama-3.1-405B-Instruct-FP8-KV`` + - Llama 3.1 405B + + * - + - ``amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV`` + - Mixtral 8x7B + + * - + - ``amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV`` + - Mixtral 8x22B + + * - ``$num_gpu`` + - 1 or 8 + - Number of GPUs + + * - ``$datatype`` + - ``float16`` or ``float8`` + - Data type + +.. _vllm-benchmark-run-benchmark: + +Running the benchmark on the MI300X accelerator +----------------------------------------------- + +Here are some examples of running the benchmark with various options. +See :ref:`Options ` for the list of +options and their descriptions. + +Example 1: latency benchmark +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use this command to benchmark the latency of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types. + +.. code-block:: + + ./vllm_benchmark_report.sh -s latency -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16 + ./vllm_benchmark_report.sh -s latency -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8 + +Find the latency reports at: + +- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_latency_report.csv`` + +- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_latency_report.csv`` + +Example 2: throughput benchmark +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use this command to benchmark the throughput of the Llama 3.1 8B model on one GPU with the ``float16`` and ``float8`` data types. + +.. code-block:: shell + + ./vllm_benchmark_report.sh -s throughput -m meta-llama/Meta-Llama-3.1-8B-Instruct -g 1 -d float16 + ./vllm_benchmark_report.sh -s throughput -m amd/Meta-Llama-3.1-8B-Instruct-FP8-KV -g 1 -d float8 + +Find the throughput reports at: + +- ``./reports_float16/summary/Meta-Llama-3.1-8B-Instruct_throughput_report.csv`` + +- ``./reports_float8/summary/Meta-Llama-3.1-8B-Instruct-FP8-KV_throughput_report.csv`` + +.. raw:: html + + + +.. note:: + + Throughput is calculated as: + + - .. math:: throughput\_tot = requests \times (\mathsf{\text{input lengths}} + \mathsf{\text{output lengths}}) / elapsed\_time + + - .. math:: throughput\_gen = requests \times \mathsf{\text{output lengths}} / elapsed\_time + +Further reading +=============== + +- For application performance optimization strategies for HPC and AI workloads, + including inference with vLLM, see :doc:`/how-to/tuning-guides/mi300x/workload`. + +- To learn more about the options for latency and throughput benchmark scripts, + see ``_. + +- To learn more about system settings and management practices to configure your system for + MI300X accelerators, see :doc:`/how-to/system-optimization/mi300x`. + +- To learn how to run LLM models from Hugging Face or your own model, see + :doc:`Using ROCm for AI `. + +- To learn how to optimize inference on LLMs, see + :doc:`Fine-tuning LLMs and inference optimization `. + +- For a list of other ready-made Docker images for ROCm, see the + :doc:`Docker image support matrix `. + +- To compare with the previous version of the ROCm vLLM Docker image for performance validation, refer to + `LLM inference performance validation on AMD Instinct MI300X (ROCm 6.2.0) `_. + diff --git a/docs/how-to/tuning-guides/mi300x/index.rst b/docs/how-to/tuning-guides/mi300x/index.rst index 1947a28d1..28389f40a 100644 --- a/docs/how-to/tuning-guides/mi300x/index.rst +++ b/docs/how-to/tuning-guides/mi300x/index.rst @@ -8,6 +8,8 @@ accelerators. They include detailed instructions on system settings and application tuning suggestions to help you fully leverage the capabilities of these accelerators, thereby achieving optimal performance. +* :doc:`/how-to/performance-validation/mi300x/vllm-benchmark` + * :doc:`/how-to/tuning-guides/mi300x/system` * :doc:`/how-to/tuning-guides/mi300x/workload` diff --git a/docs/how-to/tuning-guides/mi300x/workload.rst b/docs/how-to/tuning-guides/mi300x/workload.rst index 6857eae1b..56ad0e98a 100644 --- a/docs/how-to/tuning-guides/mi300x/workload.rst +++ b/docs/how-to/tuning-guides/mi300x/workload.rst @@ -152,9 +152,7 @@ address any new bottlenecks that may emerge. ROCm provides a prebuilt optimized Docker image that has everything required to implement the tips in this section. It includes ROCm, vLLM, PyTorch, and tuning files in the CSV -format. For more information, see the guide to -`LLM inference performance validation with vLLM on the AMD Instinct™ MI300X accelerator `_ -on the ROCm GitHub repository. +format. For more information, see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`. .. _mi300x-profiling-tools: @@ -378,11 +376,10 @@ Refer to `vLLM documentation `_ -on the ROCm GitHub repository. +ROCm provides a prebuilt optimized Docker image for validating the performance +of LLM inference with vLLM on the MI300X accelerator. The Docker image includes +ROCm, vLLM, PyTorch, and tuning files in the CSV format. For more information, +see :doc:`/how-to/performance-validation/mi300x/vllm-benchmark`. Maximize throughput ------------------- diff --git a/docs/index.md b/docs/index.md index 8513180ab..8500a91c9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ ROCm is an open-source software platform optimized to extract HPC and AI workloa performance from AMD Instinct accelerators and AMD Radeon GPUs while maintaining compatibility with industry software frameworks. For more information, see [What is ROCm?](./what-is-rocm.rst) -If you're using Radeon GPUs, consider reviewing {doc}`Radeon-specific ROCm documentation`. +If you're using AMD Radeon™ PRO or Radeon GPUs in a workstation setting with a display connected, review {doc}`Radeon-specific ROCm documentation`. Installation instructions are available from: @@ -45,7 +45,7 @@ ROCm documentation is organized into the following categories: * [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst) * [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst) * [System optimization](./how-to/system-optimization/index.rst) -* [AMD Instinct MI300X tuning guides](./how-to/tuning-guides/mi300x/index.rst) +* [AMD Instinct MI300X performance validation and tuning](./how-to/tuning-guides/mi300x/index.rst) * [GPU cluster networking](https://rocm.docs.amd.com/projects/gpu-cluster-networking/en/latest/index.html) * [System debugging](./how-to/system-debugging.md) * [Using MPI](./how-to/gpu-enabled-mpi.rst) diff --git a/docs/reference/gpu-arch-specs.rst b/docs/reference/gpu-arch-specs.rst index 116649434..73c5b7359 100644 --- a/docs/reference/gpu-arch-specs.rst +++ b/docs/reference/gpu-arch-specs.rst @@ -7,6 +7,8 @@ Accelerator and GPU hardware specifications The following tables provide an overview of the hardware specifications for AMD Instinct™ accelerators, and AMD Radeon™ PRO and Radeon™ GPUs. +For more information about ROCm hardware compatibility, see the ROCm `Compatibility matrix `_. + .. tab-set:: .. tab-item:: AMD Instinct accelerators @@ -35,11 +37,11 @@ The following tables provide an overview of the hardware specifications for AMD - CDNA3 - gfx941 or gfx942 - 192 - - 304 + - 304 (38 per XCD) - 64 - 64 - 256 - - 32 + - 32 (4 per XCD) - 32 - 16 per 2 CUs - 64 per 2 CUs @@ -50,11 +52,11 @@ The following tables provide an overview of the hardware specifications for AMD - CDNA3 - gfx940 or gfx942 - 128 - - 228 + - 228 (38 per XCD) - 64 - 64 - 256 - - 24 + - 24 (4 per XCD) - 32 - 16 per 2 CUs - 64 per 2 CUs @@ -80,7 +82,7 @@ The following tables provide an overview of the hardware specifications for AMD - CDNA2 - gfx90a - 128 - - 208 + - 208 (104 per GCD) - 64 - 64 - @@ -233,6 +235,22 @@ The following tables provide an overview of the hardware specifications for AMD - L0 Instruction Cache (KiB) - VGPR File (KiB) - SGPR File (KiB) + * + - Radeon PRO V710 + - RDNA3 + - gfx1101 + - 28 + - 54 + - 32 + - 128 + - 56 + - 4 + - 256 + - 32 + - 16 + - 32 + - 768 + - 16 * - Radeon PRO W7900 Dual Slot - RDNA3 @@ -247,8 +265,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon PRO W7900 - RDNA3 @@ -263,8 +281,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon PRO W7800 - RDNA3 @@ -279,8 +297,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon PRO W7700 - RDNA3 @@ -295,8 +313,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon PRO W6800 - RDNA2 @@ -311,8 +329,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon PRO W6600 - RDNA2 @@ -327,8 +345,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon PRO V620 - RDNA2 @@ -343,8 +361,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon Pro W5500 - RDNA @@ -359,7 +377,7 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 + - 512 - 20 * - Radeon Pro VII @@ -414,8 +432,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon RX 7900 XT - RDNA3 @@ -430,8 +448,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon RX 7900 GRE - RDNA3 @@ -446,8 +464,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon RX 7800 XT - RDNA3 @@ -462,8 +480,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon RX 7700 XT - RDNA3 @@ -478,8 +496,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 384 - - 20 + - 768 + - 16 * - Radeon RX 7600 - RDNA3 @@ -494,8 +512,8 @@ The following tables provide an overview of the hardware specifications for AMD - 32 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6950 XT - RDNA2 @@ -510,8 +528,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6900 XT - RDNA2 @@ -526,8 +544,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6800 XT - RDNA2 @@ -542,8 +560,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6800 - RDNA2 @@ -558,8 +576,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6750 XT - RDNA2 @@ -574,8 +592,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6700 XT - RDNA2 @@ -590,8 +608,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6700 - RDNA2 @@ -606,8 +624,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6650 XT - RDNA2 @@ -622,8 +640,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6600 XT - RDNA2 @@ -638,8 +656,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon RX 6600 - RDNA2 @@ -654,8 +672,8 @@ The following tables provide an overview of the hardware specifications for AMD - 16 - 16 - 32 - - 256 - - 20 + - 512 + - 16 * - Radeon VII - GCN5.1 @@ -682,7 +700,7 @@ For more information about the terms used, see the **LLVM target name** -Argument to pass to clang in `--offload-arch` to compile code for the given +Argument to pass to clang in ``--offload-arch`` to compile code for the given architecture. **VRAM** @@ -717,13 +735,13 @@ data and instructions. Similar to the L3 Cache on CDNA/GCN architectures. **L2 Cache** -Size of the level 3 cache. Shared by all compute units on the same GCD. Caches +Size of the level 2 cache. Shared by all compute units on the same GCD. Caches data and instructions. **Graphics L1 Cache (RDNA only)** An additional cache level that only exists in RDNA architectures. Local to a -work group processor. +shader array. **L1 Vector Cache (CDNA/GCN only)** @@ -770,3 +788,7 @@ scalar instructions. **GCD** Graphics Compute Die. + +**XCD** + +Accelerator Complex Die. diff --git a/docs/reference/precision-support.rst b/docs/reference/precision-support.rst index 6155cc837..fadd4b7d1 100644 --- a/docs/reference/precision-support.rst +++ b/docs/reference/precision-support.rst @@ -41,6 +41,8 @@ together with their corresponding HIP type and a short description. - ``int64_t``, ``uint64_t`` - A signed or unsigned 64-bit integer +.. _precision_support_floating_point_types: + Floating-point types ========================================== diff --git a/docs/release/versions.md b/docs/release/versions.md index 6d1e5b5fd..fbc8170a6 100644 --- a/docs/release/versions.md +++ b/docs/release/versions.md @@ -8,6 +8,8 @@ | Version | Release date | | ------- | ------------ | +| [6.2.4](https://rocm.docs.amd.com/en/docs-6.2.4/) | November 6, 2024 | + | [6.2.2](https://rocm.docs.amd.com/en/docs-6.2.2/) | September 27, 2024 | | [6.2.1](https://rocm.docs.amd.com/en/docs-6.2.1/) | September 20, 2024 | | [6.2.0](https://rocm.docs.amd.com/en/docs-6.2.0/) | August 2, 2024 | diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 9dd8af346..117ea4872 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -70,9 +70,11 @@ subtrees: - file: how-to/system-optimization/w6000-v620.md title: AMD RDNA 2 - file: how-to/tuning-guides/mi300x/index.rst - title: AMD MI300X tuning guides + title: AMD MI300X performance validation and tuning subtrees: - entries: + - file: how-to/performance-validation/mi300x/vllm-benchmark.rst + title: Performance validation - file: how-to/tuning-guides/mi300x/system.rst title: System tuning - file: how-to/tuning-guides/mi300x/workload.rst diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 10d473385..ed4e04035 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1,2 +1,2 @@ -rocm-docs-core==1.8.1 +rocm-docs-core==1.8.3 sphinx-reredirects diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 39b60b7db..94f42069b 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -92,7 +92,7 @@ requests==2.32.3 # via # pygithub # sphinx -rocm-docs-core==1.8.1 +rocm-docs-core==1.8.3 # via -r requirements.in smmap==5.0.1 # via gitdb diff --git a/tools/autotag/templates/extra_components/6.2.4.md b/tools/autotag/templates/extra_components/6.2.4.md new file mode 100644 index 000000000..37f6d2962 --- /dev/null +++ b/tools/autotag/templates/extra_components/6.2.4.md @@ -0,0 +1,35 @@ +### Hardware architecture support updates + +Updated the following math and primitives libraries to pre-enable support for +an upcoming hardware architecture. + +* hipCUB (3.2.1) + +* hipFFT (1.0.16) + +* hipRAND (2.11.1) + +* rocALUTION (3.2.1) + +* rocBLAS (4.2.4) + +* rocFFT (1.0.30) + +* rocPRIM (3.2.2) + +* rocRAND (3.1.1) + +* rocSOLVER (3.26.2) + +* rocSPARSE (3.2.1) + +* rocThrust (3.1.1) + +### **AMD SMI** (24.6.3) + +#### Resolved issues + +* Fixed support for the API calls `amdsmi_get_gpu_process_isolation` and + `amdsmi_clean_gpu_local_data`, along with the + `amd-smi set --process-isolation <0 or 1>` command. See issue + [#3500](https://github.com/ROCm/ROCm/issues/3500) on GitHub. diff --git a/tools/autotag/templates/highlights/6.2.4.md b/tools/autotag/templates/highlights/6.2.4.md new file mode 100644 index 000000000..4d8af7762 --- /dev/null +++ b/tools/autotag/templates/highlights/6.2.4.md @@ -0,0 +1,55 @@ +# ROCm 6.2.4 release notes + +The release notes provide a summary of notable changes since the previous ROCm release. + +- [Release highlights](#release-highlights) + +- [Operating system and hardware support changes](#operating-system-and-hardware-support-changes) + +- [ROCm components versioning](#rocm-components) + +- [Detailed component changes](#detailed-component-changes) + +- [ROCm known issues](#rocm-known-issues) + +- [ROCm upcoming changes](#rocm-upcoming-changes) + +```{note} +If you’re using Radeon™ PRO or Radeon GPUs in a workstation setting with a +display connected, continue to use ROCm 6.2.3. See the [Use ROCm on Radeon +GPUs](https://rocm.docs.amd.com/projects/radeon/en/latest/index.html) +documentation to verify compatibility and system requirements. +``` + +## Release highlights + +The following are notable new features and improvements in ROCm 6.2.4. For changes to individual components, see +[Detailed component changes](#detailed-component-changes). + +#### ROCm documentation updates + +ROCm documentation continues to be updated to provide clearer and more comprehensive guidance for +a wider variety of user needs and use cases. + +* Added a new GPU cluster networking guide. See + [Cluster network performance validation for AMD Instinct accelerators](https://rocm.docs.amd.com/projects/gpu-cluster-networking/en/latest/index.html). + + This documentation provides guidelines on validating network configurations + in single-node and multi-node environments to attain optimal speed and bandwidth + in AMD Instinct-powered clusters. + +* Updated the HIP runtime documentation. + + * Added a new section on how to use [HIP graphs](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hipgraph.html). + + * Added a new section about the [Stream ordered memory allocator (SOMA)](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/stream_ordered_allocator.html). + + * Updated the [Porting CUDA driver API](https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_porting_driver_api.html) section. + +* Updated the [Post-installation instructions](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/install/post-install.html) + with guidance on using the `update-alternatives` utility and environment modules to help you manage multiple ROCm + versions and streamline PATH configuration. + +* Updated [LLM inference performance validation on AMD Instinct + MI300X](https://rocm.docs.amd.com/en/docs-6.2.4/how-to/performance-validation/mi300x/vllm-benchmark.html) + documentation with more detailed guidance, new models, and the `float8` data type. diff --git a/tools/autotag/templates/support/6.2.4.md b/tools/autotag/templates/support/6.2.4.md new file mode 100644 index 000000000..cb5b1f3a2 --- /dev/null +++ b/tools/autotag/templates/support/6.2.4.md @@ -0,0 +1,5 @@ +## Operating system and hardware support changes + +ROCm 6.2.4 adds support for the AMD Radeon PRO V710 GPU. See +[Supported GPUs](https://rocm.docs.amd.com/projects/install-on-linux-internal/en/docs-6.2.4/reference/system-requirements.html#supported-gpus) +for more information. diff --git a/tools/autotag/templates/upcoming_changes/6.2.4.md b/tools/autotag/templates/upcoming_changes/6.2.4.md new file mode 100644 index 000000000..24e7e6696 --- /dev/null +++ b/tools/autotag/templates/upcoming_changes/6.2.4.md @@ -0,0 +1,9 @@ +The following changes to the ROCm software stack are anticipated for future releases. + +### rocm-llvm-alt + +The `rocm-llvm-alt` package will be removed in an upcoming release. Users relying on the functionality provided by the closed-source compiler should transition to the open-source compiler. Once the `rocm-llvm-alt` package is removed, any compilation requesting functionality provided by the closed-source compiler will result in a Clang warning: "*[AMD] proprietary optimization compiler has been removed*". + +### rccl-rdma-sharp-plugins + +The RCCL plugin package, `rccl-rdma-sharp-plugins`, will be removed in an upcoming ROCm release.