Compare commits

...

130 Commits

Author SHA1 Message Date
Istvan Kiss
848159f6c3 WIP 2024-07-01 13:36:11 +02:00
Istvan Kiss
ddf810a781 WIP 2024-07-01 13:20:21 +02:00
Istvan Kiss
a7cc71df62 WIP 2024-07-01 13:14:17 +02:00
Istvan Kiss
5c4674027b Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 13:09:34 +02:00
Istvan Kiss
f111d5654c Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 13:09:23 +02:00
Istvan Kiss
7f43dbbbb7 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 13:09:13 +02:00
Istvan Kiss
526db1c474 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 12:48:13 +02:00
Istvan Kiss
d612ae390c Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 12:40:50 +02:00
Istvan Kiss
cca2dc23c0 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 12:39:37 +02:00
Istvan Kiss
0a88853ca3 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 12:39:17 +02:00
Istvan Kiss
4164cdc606 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-07-01 12:26:41 +02:00
Istvan Kiss
712e63d0ad WIP 2024-06-29 12:52:00 +02:00
Istvan Kiss
f2adaebbcd Change usage to value 2024-06-29 12:51:59 +02:00
Istvan Kiss
3f74a73220 Update docs/reference/env-variables.rst
Co-authored-by: srawat <120587655+SwRaw@users.noreply.github.com>
2024-06-29 12:51:59 +02:00
Istvan Kiss
84e3063e0a Remove type column leftover 2024-06-29 12:51:58 +02:00
Istvan Kiss
06e8d93bf9 Update environment variables 2024-06-29 12:51:58 +02:00
Istvan Kiss
55ee1d1b95 Fix 2024-06-29 12:51:57 +02:00
Istvan Kiss
97619286df Fix meta 2024-06-29 12:51:57 +02:00
Istvan Kiss
f0a0d4e738 Minor fixes. 2024-06-29 12:51:56 +02:00
Istvan Kiss
517b8645b4 Removed the unknow variables. 2024-06-29 12:51:56 +02:00
Istvan Kiss
367d6cdf5e Fixes.
Fixes
2024-06-29 12:51:55 +02:00
Istvan Kiss
4db7ffeb69 Update rocPRIM, hipCUB and rocThrust env variables 2024-06-29 12:51:55 +02:00
Istvan Kiss
afb343fce6 WIP on env_variables 2024-06-29 12:51:54 +02:00
Istvan Kiss
fcc99a324a Update env variables 2024-06-29 12:51:54 +02:00
Istvan Kiss
6367a53775 Minor changes 2024-06-29 12:51:53 +02:00
Istvan Kiss
e19b947c26 Stying refactor 2024-06-29 12:51:53 +02:00
Bence Parajdi
f181f84b97 fix review commenets 2024-06-29 12:51:52 +02:00
Bence Parajdi
1f2d583372 remove non-public debug option 2024-06-29 12:51:52 +02:00
Bence Parajdi
f8d46afdd2 fix more review comments 2024-06-29 12:51:51 +02:00
Bence Parajdi
9068df3bb7 fix review comments 2024-06-29 12:51:51 +02:00
Bence Parajdi
9adeb56ebd fix typos and add missing words to wordlist 2024-06-29 12:51:50 +02:00
Bence Parajdi
37775f2ff4 add missing env variables 2024-06-29 12:51:50 +02:00
Mátyás Aradi
9e7a8a93cd Update based on review comments 2024-06-29 12:51:49 +02:00
Istvan Kiss
8e90fdbc4a Initial version 2024-06-29 12:51:49 +02:00
alexxu-amd
325a2fd54c External CI: Fix a typo from composable_kernel pipeline (#3373)
* add libdrm-dev lib to CK dependency list

* change INSTANCE_ONLY to INSTANCES_ONLY
2024-06-28 15:39:08 -04:00
Peter Park
a552f9f6b8 Add fixes to vLLM install and triton kernel optimization (#3366)
* Add fixes to vLLM install and triton kernel optimization

* Update TGI how-to

remove extra step in TGI
2024-06-27 14:28:20 -04:00
Joseph Macaranas
accb1347ea External CI: Add initial support for rocAL (#3365) 2024-06-27 13:58:10 -04:00
alexxu-amd
699b604f00 Add INSTANCE_ONLY cmake flag; change pool to ultra; increase time limit to 3.5hr (#3275) 2024-06-27 10:01:43 -04:00
Sam Wu
ce08245f4c Merge pull request #3362 from peterjunpark/fix/index-styling
Fix card text color in index
2024-06-26 15:43:50 -06:00
Peter Jun Park
5c9d071e85 remove card text styling 2024-06-26 14:12:25 -04:00
randyh62
356ad4ab47 remove Magma (#3361)
* remove Magma

* missed one
2024-06-26 10:00:39 -07:00
Sam Wu
57d59bfcc6 Merge pull request #3358 from samjwu/articleinfo
Remove article info for moved or deleted pages
2024-06-26 09:44:49 -06:00
Sam Wu
791285772d Remove article info for moved or deleted pages 2024-06-25 16:45:42 -06:00
abhimeda
217830fe25 added matrices artifact uploading code from rocSPARSE (#3356) 2024-06-25 15:04:52 -04:00
randyh62
f07608bc92 added ROCm Core and AMD SMI (#3348)
* added ROCm Core and AMD SMI

* fix URLs
2024-06-21 16:36:39 -07:00
Peter Park
1435634f5c reorder toc (#3346) 2024-06-21 18:53:55 -04:00
Sam Wu
ee384ba0e0 Merge pull request #3345 from ROCm/dependabot/pip/docs/sphinx/sphinx-reredirects-0.1.4
Bump sphinx-reredirects from 0.1.3 to 0.1.4 in /docs/sphinx
2024-06-21 16:46:24 -06:00
dependabot[bot]
bb0090882c Bump sphinx-reredirects from 0.1.3 to 0.1.4 in /docs/sphinx
Bumps [sphinx-reredirects](https://github.com/documatt/sphinx-reredirects) from 0.1.3 to 0.1.4.
- [Commits](https://github.com/documatt/sphinx-reredirects/compare/v0.1.3...v0.1.4)

---
updated-dependencies:
- dependency-name: sphinx-reredirects
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-21 22:37:37 +00:00
Peter Park
22e9f6f373 Add "Using ROCm for HPC" guide (#3302)
* Add ROCm for HPC

* Update index and toc

* Add TMs in other tutorials

* Add hpc apps table

Spellcheck

add stack image and fix links

Add descriptions

update copy

Update copy

add ref

Finish adding app descriptions

tweak descs

fix line lengths

* Revert "Add TMs in other tutorials"

This reverts commit 08a1a80e57.

* Add links to install and compat matrix

* Update HPC stack graphic and add some links

Add hpc and td to wordlist

fix links

* Apply suggestions from Leo's review

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

Update docs/how-to/rocm-for-hpc/index.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

Update docs/how-to/rocm-for-hpc/index.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

Update docs/how-to/rocm-for-hpc/index.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

Update docs/how-to/rocm-for-hpc/index.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

Update docs/how-to/rocm-for-hpc/index.rst

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

fix formatting

Update words

* update wordlist

* Update hpc app descriptions with content from InfinityHub catalog
2024-06-21 16:15:18 -04:00
randyh62
d994302df7 license information updated (#3339)
* license information updated

* Young's comments

* Sam's comment
2024-06-21 09:22:57 -07:00
Peter Park
9d4eb5eff2 Add RHEL 9.4 to compat matrix (#3332)
* Add RHEL 9.4 to compat matrix

* add rhel 9.4 footnote in compat matrix
2024-06-19 15:03:29 -04:00
danielsu-amd
8b95ab0a02 External CI: remove redundant rocm-examples build flags (#3331) 2024-06-19 13:08:31 -04:00
danielsu-amd
e74245fbe4 External CI: Latest source pipeline for rocm-examples (#3317) 2024-06-19 09:59:02 -04:00
Peter Park
778c8e2c05 Add Oracle Linux 8.9 to 6.1.1 changelog (#3327) 2024-06-18 18:29:09 -04:00
Peter Park
361983fa48 Add OL support note to compat matrix (#3325)
Fix footnote

Footnote order

Satisfy spellcheck
2024-06-18 17:32:07 -04:00
Sam Wu
3dff636d40 Merge pull request #3314 from ROCm/dependabot/pip/docs/sphinx/urllib3-2.2.2
Bump urllib3 from 2.2.1 to 2.2.2 in /docs/sphinx
2024-06-18 14:52:26 -06:00
Peter Park
1d976a1871 Add Radeon PRO dual slot to hw specs (#3318) 2024-06-18 15:22:43 -04:00
randyh62
ebfec1b7c1 remove nvcc (#3313)
* remove nvcc

* Update CHANGELOG to match 6.0.0 template

---------

Co-authored-by: Sam Wu <22262939+samjwu@users.noreply.github.com>
2024-06-18 12:11:40 -07:00
dependabot[bot]
66b71ba3c8 Bump urllib3 from 2.2.1 to 2.2.2 in /docs/sphinx
Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.1 to 2.2.2.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/2.2.1...2.2.2)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-17 23:41:54 +00:00
Joseph Macaranas
e903ffa952 External CI: Update aqlprofile binary used for rocprofiler (#3304) 2024-06-17 14:23:36 -04:00
Peter Park
fe1c2e9529 Update link to ROCr Debug Agent to docs portal (#3303)
* Fix link to debug agent in what-is-rocm

* ROCm --> ROCR

add index

* ROCR --> ROCr

* Change ROCm Debug Agent to ROCr Debug Agent in docs
2024-06-14 17:52:49 -04:00
Joseph Macaranas
923141f300 External CI: Fixes for two repos to work with latest source (#3293)
With MIOpen now building with latest source on External CI, this unblocked AMDMIGraphX from building with latest source.

Determined rocMLIR also needed to be built with latest source as a dependency.
2024-06-13 11:55:40 -04:00
David Galiffi
c91e15a580 Moving rocm-build to the tools folder (#3285)
[Why]
To maintain the "pitchfork layout" convention used by the repository.

[How]
- Update README.md
- Update INFRA_REPO in ROCm.mk
   - Updated to new path: ROCm/tools/rocm-build

---------

Signed-off-by: David Galiffi <David.Galiffi@amd.com>
2024-06-12 17:12:06 -04:00
Peter Park
d24b3fab61 Fix ExLlama-v2 code snippet (#3281) 2024-06-12 17:03:04 -04:00
Jeffrey Novotny
e864aa50ac Remove AOMP from compatibility matrix (#3289) 2024-06-12 14:17:32 -04:00
srawat
2531f0aa03 Update link to command-line argument reference (#3270)
* Added deleted sections to openmp.md and other improvements

* Update openmp.md
2024-06-12 11:53:22 -04:00
Joseph Macaranas
13e14363cc External CI: updated MIOpen dependencies (#3278) 2024-06-12 11:23:21 -04:00
Joseph Macaranas
664c047311 External CI: Package rocSPARSE matrices for testers to consume (#3276) 2024-06-12 11:22:46 -04:00
Istvan Kiss
78fdcdf48d Update docs/conceptual/setting-cus.rst
Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>
2024-06-12 16:17:42 +02:00
Peter Park
c4181b9245 Remove aomp from What is ROCm? page (#3282) 2024-06-11 11:37:11 -04:00
alexxu-amd
7a13a6ee86 Merge pull request #3274 from ROCm/amd/alexxu12/fixStagingCI
Fix hipTensor build error on develop branch
2024-06-11 11:02:26 -04:00
Joseph Macaranas
ace708935d External CI: updated rocr_debug_agent dependencies (#3277) 2024-06-11 10:59:13 -04:00
alexxu-amd
cff1b2b021 revert changes for manual test 2024-06-11 10:39:28 -04:00
alexxu-amd
d7eacf56e3 adjust variables for manual test 2024-06-11 10:20:54 -04:00
alexxu-amd
bddbc6b444 revert changes to see if the build still fails 2024-06-11 10:07:20 -04:00
alexxu-amd
67f04977fb Move double dash to parameter for generic use case 2024-06-11 09:53:14 -04:00
randyh62
f500c32989 add quarantine_size_mb (#3264)
* add quarantine_size_mb

* Update docs/conceptual/using-gpu-sanitizer.md

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

* Update docs/conceptual/using-gpu-sanitizer.md

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>

* format fix

* format fix again

* ASAN capitalization

* remove particular

* indent bullets

* Leo comments

---------

Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>
2024-06-10 11:59:47 -07:00
alexxu-amd
3c1d39f251 revert changes to rdc 2024-06-10 14:02:57 -04:00
alexxu-amd
93f524586b revert changes made for manual tests 2024-06-10 14:02:04 -04:00
alexxu-amd
b36de1d3d4 delete space 2024-06-10 13:59:33 -04:00
alexxu-amd
627d38412a Revert changes to CK 2024-06-10 13:58:44 -04:00
alexxu-amd
1be99075e2 Change thread number to 32 2024-06-10 13:53:23 -04:00
alexxu-amd
05d7992361 change multithread flag 2024-06-10 13:03:53 -04:00
alexxu-amd
98f2e183a2 change pool back to MEDIUM before merge 2024-06-10 11:56:25 -04:00
alexxu-amd
ab1c62464a change pool to high 2024-06-10 11:38:32 -04:00
alexxu-amd
2e73c56275 Update hipTensor.yml 2024-06-10 11:37:22 -04:00
Joseph Macaranas
f8151b6cb5 rocprofiler-register: Add unit testing (#3272)
Since this component uses the base pool, does not need GPU for testing and is very quick to run, unit testing can be done within the same job.
2024-06-10 11:29:47 -04:00
alexxu-amd
52bccc1819 add variable declaration 2024-06-10 10:51:38 -04:00
alexxu-amd
2b492056ec add multithread Flag to build-cmake to allow hipTensor pass -j16 2024-06-10 10:46:33 -04:00
alexxu-amd
b12e5c32ca Restore hipTensor's original flag, remove GNinja 2024-06-10 10:15:05 -04:00
Joseph Macaranas
8db9220935 External CI: non-interactive apt upgrades (#3271) 2024-06-08 22:20:11 -04:00
alexxu-amd
30851e9c85 Merge pull request #3266 from ROCm/amd/alexxu12/aptScriptTypo
Fix a typo from .azuredevops/templates/steps/dependencies-other.yml
2024-06-07 13:36:37 -04:00
alexxu-amd
fdd0ed080b fix a typo 2024-06-07 13:29:14 -04:00
Joseph Macaranas
d3f634ea33 Remove branch filter for aomp pipeline trigger (#3258)
Previous filter was not triggering this CI pipeline when ROCm-Runtime build was triggered from a pipeline completion trigger of llvm-project.
2024-06-07 11:14:32 -04:00
Sam Wu
6c73abbaea Merge pull request #3262 from ROCm/bb-develop-6.1.2-pr
Add the manifest file for ROCm6.1.2
2024-06-06 17:07:14 -06:00
Sam Wu
c49877adc9 Merge branch 'roc-6.1.x' into develop 2024-06-06 17:06:13 -06:00
Sam Wu
49404d69f8 Merge pull request #3263 from ROCm/dependabot/pip/docs/sphinx/rocm-docs-core-1.4.0
Bump rocm-docs-core from 1.2.0 to 1.4.0 in /docs/sphinx
2024-06-06 14:18:31 -06:00
dependabot[bot]
d17e602769 Bump rocm-docs-core from 1.2.0 to 1.4.0 in /docs/sphinx
Bumps [rocm-docs-core](https://github.com/ROCm/rocm-docs-core) from 1.2.0 to 1.4.0.
- [Release notes](https://github.com/ROCm/rocm-docs-core/releases)
- [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md)
- [Commits](https://github.com/ROCm/rocm-docs-core/compare/v1.2.0...v1.4.0)

---
updated-dependencies:
- dependency-name: rocm-docs-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-06 20:04:21 +00:00
Wang, Yanyao
2fdbc8b475 Add the manifest file for ROCm6.1.2 2024-06-06 12:44:08 -07:00
Peter Park
7d3fb25725 Update links in compat matrix and what-is-rocm (#3253)
* Update links in compat matrix and what-is-rocm

* Tensorflow -> TensorFlow

* Remove extra lines

* Revert "Remove extra lines"

This reverts commit 607c4323ac.

ROCm Debug Agent
2024-06-06 13:27:00 -04:00
alexxu-amd
8c3eaa1fda Update hipTensor.yml 2024-06-06 11:56:08 -04:00
alexxu-amd
acca214a29 Update hipTensor.yml 2024-06-06 11:43:07 -04:00
Wang, Yanyao
b7c6671e06 Fix Markdown formate for the linter check 2024-06-05 13:44:50 -07:00
Wang, Yanyao
27bd772bbe Update the branch of ROCm repo after testing 2024-06-05 13:44:50 -07:00
Wang, Yanyao
68c45d30b5 Build ROCm from source 2024-06-05 13:44:50 -07:00
Sam Wu
35835c4289 Fix first link in compatibility matrix table (#3239)
* Fix first link in compatibility matrix table

* Revert "Fix first link in compatibility matrix table"

This reverts commit 069c5c116a.

* Remove sticky header and unused css

* Remove container from hardware specs matrix

---------

Co-authored-by: Peter Jun Park <peter.park@amd.com>
2024-06-05 15:48:27 -04:00
Wang, Yanyao
73b7b02c4f Fix Markdown formate for the linter check 2024-06-05 12:15:12 -07:00
Wang, Yanyao
ba7afa9808 Update the branch of ROCm repo after testing 2024-06-05 12:15:12 -07:00
Wang, Yanyao
ae6eac2823 Build ROCm from source 2024-06-05 12:15:12 -07:00
alexxu-amd
6eb6a5bd90 change compiler from hipcc to amdclang++ 2024-06-05 14:14:24 -04:00
Young Hui - AMD
55bb127e9a fix links for MIVisionX (#3240) 2024-06-05 11:55:11 -04:00
Sam Wu
e65e9307f5 Add 6.1.2 to version list (#3238) 2024-06-05 11:25:35 -04:00
Peter Park
6494885359 Rename fine-tuning and optimization guide directory and fix index.md (#3242)
* Mv fine-tuning and optimization files

* Reorder index.md

* Rename images directory

* Fix internal links
2024-06-05 11:11:00 -04:00
Sam Wu
266f502010 Update manifest to 6.1.2 2024-06-05 11:06:24 -04:00
abhimeda
bf08674992 Built rccl using latest source code (#3230) 2024-06-04 17:50:36 -04:00
alexxu-amd
8826b10b92 Updates cmake flag to run CK with instance_only on all gpu targets 2024-06-04 17:40:48 -04:00
alexxu-amd
a96ec80cb0 Increase timeout limites to a day for CK 2024-06-04 13:05:41 -04:00
alexxu-amd
57506ba947 upgrade pool to HIGH for CK 2024-06-04 11:59:16 -04:00
alexxu-amd
4b67c8725b change compiler to clang++ and build for instance only 2024-06-04 11:57:18 -04:00
alexxu-amd
258e504595 change pool to medium 2024-06-04 09:52:36 -04:00
alexxu-amd
156215efcc Upgrade pool to HIGH 2024-06-04 09:38:50 -04:00
alexxu-amd
7c448eec8f add MI250 target to CK 2024-06-04 09:38:05 -04:00
alexxu-amd
29f9b4ab23 chang gpu target to gfx90a 2024-06-03 15:39:41 -04:00
alexxu-amd
6e99bef8f4 change pool to BASE 2024-06-03 14:42:24 -04:00
alexxu-amd
5025a03f79 change hipTensor compiler to hipcc 2024-06-03 10:39:36 -04:00
alexxu-amd
527840e502 Merge branch 'develop' of https://github.com/ROCm/ROCm into amd/alexxu12/fixStagingCI 2024-05-31 15:30:32 -04:00
amd-jmacaran
a65db6b47d temp change for testing experimental 2024-05-31 15:25:42 -04:00
alexxu-amd
b69b997d69 Change pool to LOW 2024-05-31 14:12:57 -04:00
alexxu-amd
52f8a0ad36 change default branch to develop 2024-05-31 13:46:19 -04:00
alexxu-amd
ad9cdaa2a9 Switch to staging branch 2024-05-31 11:02:01 -04:00
154 changed files with 12813 additions and 1010 deletions

View File

@@ -17,11 +17,7 @@ resources:
pipelines:
- pipeline: rocr-runtime_pipeline
source: \ROCR-Runtime
trigger:
branches:
include:
- master
trigger: true
# this job will only be triggered after successful build sequence of llvm-project and ROCR-Runtime
trigger: none

View File

@@ -84,8 +84,8 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-DCMAKE_BUILD_TYPE=Release
-DAMDGPU_TARGETS=gfx1030;gfx1100
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm

View File

@@ -16,6 +16,7 @@ parameters:
- libbz2-dev
- nlohmann-json3-dev
- libgtest-dev
- libdrm-dev
- name: rocmDependencies
type: object
default:
@@ -30,6 +31,7 @@ parameters:
- rocprofiler-register
- clr
- rocminfo
- roctracer
jobs:
- job: MIOpen

View File

@@ -12,6 +12,7 @@ parameters:
- ninja-build
- git
- python3-pip
- libdrm-dev
- name: rocmDependencies
type: object
default:
@@ -24,10 +25,11 @@ parameters:
jobs:
- job: composable_kernel
timeoutInMinutes: 210
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ variables.MEDIUM_BUILD_POOL }}
pool: ${{ variables.ULTRA_BUILD_POOL }}
workspace:
clean: all
steps:
@@ -57,6 +59,6 @@ jobs:
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release
-DGPU_TARGETS=gfx1030;gfx1100
-DINSTANCES_ONLY=ON
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -65,3 +65,13 @@ jobs:
-DBUILD_CLIENTS_SAMPLES=OFF
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: hipSPARSE
publish: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
parameters:
sourceDir: $(Build.SourcesDirectory)/build/clients
contentsString: matrices/**
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: testMatrices

View File

@@ -55,9 +55,9 @@ jobs:
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DROCM_PATH="$(Agent.BuildDirectory)/rocm"
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_BUILD_TYPE=Release
-DHIPTENSOR_BUILD_TESTS=ON
-DAMDGPU_TARGETS=gfx1030;gfx1100
-GNinja
-DAMDGPU_TARGETS=gfx90a
multithreadFlag: -- -j32
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -61,6 +61,7 @@ jobs:
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: tag-builds
- script: chmod +x $(Agent.BuildDirectory)/rocm/bin/hipify-perl
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-

View File

@@ -0,0 +1,138 @@
parameters:
- name: checkoutRepo
type: string
default: 'self'
- name: checkoutRef
type: string
default: ''
- name: aptPackages
type: object
default:
- python3-pip
- python3-protobuf
- cmake
- ninja-build
- libprotobuf-dev
- libprotoc-dev
- protobuf-compiler
- liblmdb-dev
- pkg-config
- ffmpeg
- libavcodec-dev
- libavformat-dev
- libavutil-dev
- libswscale-dev
- libturbojpeg-dev
- libjpeg-turbo-official=3.0.2-20240124
- libopencv-dev
- name: pipModules
type: object
default:
- numpy
- opencv-python
- torch
- pillow
- name: rocmDependencies
type: object
default:
- rocm-cmake
- llvm-project
- ROCR-Runtime
- clr
- rocDecode
- half
- rpp
- MIVisionX
- aomp
jobs:
- job: rocAL
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool:
vmImage: ${{ variables.BASE_BUILD_POOL }}
workspace:
clean: all
steps:
- task: Bash@3
displayName: 'Register libjpeg-turbo packages'
inputs:
targetType: inline
script: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget -q -O- https://packagecloud.io/dcommander/libjpeg-turbo/gpgkey | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/libjpeg-turbo.gpg > /dev/null
echo "deb [signed-by=/etc/apt/trusted.gpg.d/libjpeg-turbo.gpg] https://packagecloud.io/dcommander/libjpeg-turbo/any/ any main" | sudo tee /etc/apt/sources.list.d/libjpeg-turbo.list
sudo apt update
apt-cache show libjpeg-turbo-official | grep Version
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
pipModules: ${{ parameters.pipModules }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- task: Bash@3
displayName: 'Clone PyBind11'
inputs:
targetType: inline
script: git clone --depth 1 -b v2.11.1 https://github.com/pybind/pybind11
workingDirectory: '$(Build.SourcesDirectory)'
- task: Bash@3
displayName: 'Clone RapidJSON'
inputs:
targetType: inline
script: git clone --depth 1 https://github.com/Tencent/rapidjson.git
workingDirectory: '$(Build.SourcesDirectory)'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: PyBind11
cmakeBuildDir: '$(Build.SourcesDirectory)/pybind11/build'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
-DDOWNLOAD_CATCH=ON
-DDOWNLOAD_EIGEN=ON
-GNinja
- task: Bash@3
displayName: 'Install PyBind11'
inputs:
targetType: inline
script: sudo cmake --build . --target install
workingDirectory: '$(Build.SourcesDirectory)/pybind11/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: RapidJSON
cmakeBuildDir: '$(Build.SourcesDirectory)/rapidjson/build'
customInstallPath: false
installEnabled: false
extraBuildFlags: >-
-GNinja
- task: Bash@3
displayName: 'Install RapidJSON'
inputs:
targetType: inline
script: sudo cmake --build . --target install
workingDirectory: '$(Build.SourcesDirectory)/rapidjson/build'
# CI case: download latest default branch build
- ${{ if eq(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: staging
# manual build case: triggered by ROCm/ROCm repo
- ${{ if ne(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: tag-builds
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;/opt/libjpeg-turbo
-DCMAKE_INSTALL_PREFIX_PYTHON=$Python3_STDARCH
-DCMAKE_BUILD_TYPE=Release
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -10,6 +10,13 @@ parameters:
default:
- cmake
- ninja-build
- git
- python3-pip
- name: rocmDependencies
type: object
default:
- llvm-project
- rocm-cmake
jobs:
- job: rocMLIR
@@ -17,8 +24,6 @@ jobs:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ variables.MEDIUM_BUILD_POOL }}
container:
image: ${{ variables.DOCKER_IMAGE_NAME }}:${{ variables.LATEST_DOCKER_VERSION }}
workspace:
clean: all
steps:
@@ -29,13 +34,25 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
# CI case: download latest default branch build
- ${{ if eq(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: staging
# manual build case: triggered by ROCm/ROCm repo
- ${{ if ne(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: tag-builds
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
extraBuildFlags: >-
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/amdclang++
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/amdclang
-DCMAKE_PREFIX_PATH=/opt/rocm
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang++
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/clang
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DBUILD_FAT_LIBROCKCOMPILER=1
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -75,3 +75,13 @@ jobs:
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/hip/cmake
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: rocSPARSE
publish: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-prepare-package.yml
parameters:
sourceDir: $(Build.SourcesDirectory)/build/clients
contentsString: matrices/**
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
parameters:
artifactName: testMatrices

View File

@@ -5,6 +5,30 @@ parameters:
- name: checkoutRef
type: string
default: ''
- name: aptPackages
type: object
default:
- libglfw3-dev
- name: rocmDependencies
type: object
default:
- AMDMIGraphX
- clr
- hipBLAS
- hipCUB
- HIPIFY
- hipRAND
- hipSOLVER
- hipSPARSE
- llvm-project
- rocBLAS
- rocPRIM
- rocprofiler-register
- ROCR-Runtime
- rocRAND
- rocSOLVER
- rocSPARSE
- rocThrust
jobs:
- job: rocm_examples
@@ -20,5 +44,28 @@ jobs:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
parameters:
aptPackages: ${{ parameters.aptPackages }}
# CI case: download latest default branch build
- ${{ if eq(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: staging
# manual build case: triggered by ROCm/ROCm repo
- ${{ if ne(parameters.checkoutRef, '') }}:
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: tag-builds
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
# https://github.com/ROCm/HIP/issues/2203
extraBuildFlags: >-
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_ROOT=$(Agent.BuildDirectory)/rocm
-DCMAKE_HIP_ARCHITECTURES=gfx1030;gfx1100
-DCMAKE_EXE_LINKER_FLAGS=-fgpu-rdc
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -21,4 +21,17 @@ jobs:
parameters:
checkoutRepo: ${{ parameters.checkoutRepo }}
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: rocprofiler-register
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
parameters:
componentName: rocprofiler-register-tests
extraBuildFlags: >-
-DCMAKE_PREFIX_PATH=$(Build.BinariesDirectory)
cmakeBuildDir: 'tests/build'
installEnabled: false
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
parameters:
componentName: rocprofiler-register
testDir: 'tests/build'
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -47,7 +47,7 @@ jobs:
variables:
- group: common
- template: /.azuredevops/variables-global.yml
- name: HIP_ROCCLR_HOME
- name: HIP_ROCCLR_HOME
value: $(Agent.BuildDirectory)/rocm
- name: ROCM_PATH
value: $(Agent.BuildDirectory)/rocm
@@ -68,7 +68,7 @@ jobs:
displayName: 'Download aqlprofile'
inputs:
targetType: inline
script: wget -nv https://repo.radeon.com/rocm/apt/6.1/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.60100.60100-82~22.04_amd64.deb
script: wget -nv https://repo.radeon.com/rocm/misc/aqlprofile/ubuntu-22.04/hsa-amd-aqlprofile_1.0.0.60200.60200-crdnnh.14213~22.04_amd64.deb
workingDirectory: '$(Pipeline.Workspace)'
- task: Bash@3
displayName: 'Extract aqlprofile'
@@ -76,7 +76,7 @@ jobs:
targetType: inline
script: |
mkdir hsa-amd-aqlprofile
dpkg-deb -R hsa-amd-aqlprofile_1.0.0.60100.60100-82~22.04_amd64.deb hsa-amd-aqlprofile
dpkg-deb -R hsa-amd-aqlprofile_1.0.0.60200.60200-crdnnh.14213~22.04_amd64.deb hsa-amd-aqlprofile
workingDirectory: '$(Pipeline.Workspace)'
- task: Bash@3
displayName: 'Move aqlprofile'
@@ -84,7 +84,7 @@ jobs:
targetType: inline
script: |
mkdir -p $(Agent.BuildDirectory)/rocm
cp -R hsa-amd-aqlprofile/opt/rocm-6.1.0/* $(Agent.BuildDirectory)/rocm
cp -R hsa-amd-aqlprofile/opt/rocm-6.2.0-14213/* $(Agent.BuildDirectory)/rocm
workingDirectory: '$(Pipeline.Workspace)'
# CI case: download latest default branch build
- ${{ if eq(parameters.checkoutRef, '') }}:

View File

@@ -15,11 +15,13 @@ parameters:
- name: rocmDependencies
type: object
default:
- rocm-cmake
- clr
- llvm-project
- ROCdbgapi
- rocminfo
- ROCR-Runtime
- rocprofiler-register
jobs:
- job: rocr_debug_agent
@@ -56,5 +58,6 @@ jobs:
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
-GNinja
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml

View File

@@ -0,0 +1,29 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: checkoutRef
type: string
default: refs/tags/$(LATEST_RELEASE_TAG)
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
- repository: release_repo
type: github
endpoint: ROCm
name: ROCm/rocAL
ref: ${{ parameters.checkoutRef }}
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/rocAL.yml
parameters:
checkoutRepo: release_repo
checkoutRef: ${{ parameters.checkoutRef }}

View File

@@ -0,0 +1,29 @@
variables:
- group: common
- template: /.azuredevops/variables-global.yml
parameters:
- name: checkoutRef
type: string
default: refs/tags/$(LATEST_RELEASE_TAG)
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
- repository: release_repo
type: github
endpoint: ROCm
name: ROCm/rocm-examples
ref: ${{ parameters.checkoutRef }}
trigger: none
pr: none
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/rocm-examples.yml
parameters:
checkoutRepo: release_repo
checkoutRef: ${{ parameters.checkoutRef }}

View File

@@ -21,10 +21,14 @@ parameters:
half: master
HIP: develop
hipBLAS: develop
hipCUB: develop
hipRAND: develop
hipSOLVER: develop
hipSPARSE: develop
llvm-project: amd-staging
MIOpen: develop
MIVisionX: develop
rdc: develop
rocBLAS: develop
ROCdbgapi : amd-master
rocDecode: develop
@@ -40,6 +44,7 @@ parameters:
rocSOLVER: develop
rocSPARSE: develop
ROCT-Thunk-Interface: master
rocThrust: develop
roctracer: amd-master
rpp: master
- name: componentsFailureOkay

View File

@@ -5,6 +5,9 @@ parameters:
- name: extraBuildFlags
type: string
default: ''
- name: multithreadFlag
type: string
default: ''
- name: cmakeBuildDir
type: string
default: 'build'
@@ -17,6 +20,12 @@ parameters:
- name: installDir
type: string
default: '$(Build.BinariesDirectory)'
- name: customInstallPath
type: boolean
default: true
- name: installEnabled
type: boolean
default: true
steps:
# create workingDirectory if it does not exist and change into it
@@ -25,19 +34,23 @@ steps:
displayName: '${{parameters.componentName }} CMake Flags'
inputs:
workingDirectory: ${{ parameters.cmakeBuildDir }}
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
${{ if eq(parameters.customInstallPath, true) }}:
cmakeArgs: -DCMAKE_INSTALL_PREFIX=${{ parameters.installDir }} ${{ parameters.extraBuildFlags }} ..
${{ else }}:
cmakeArgs: ${{ parameters.extraBuildFlags }} ..
# equivalent to running make $cmakeTargetDir from $cmakeBuildDir
# i.e., cd $cmakeBuildDir; make $cmakeTargetDir
- task: CMake@1
displayName: '${{parameters.componentName }} Build'
inputs:
workingDirectory: ${{ parameters.cmakeBuildDir }}
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }}'
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} ${{ parameters.multithreadFlag }}'
retryCountOnTaskFailure: 10
# equivalent to running make $cmakeTarget from $cmakeBuildDir
# e.g., make install
- task: CMake@1
displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
inputs:
workingDirectory: ${{ parameters.cmakeBuildDir }}
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.cmakeTarget }}'
- ${{ if eq(parameters.installEnabled, true) }}:
- task: CMake@1
displayName: '${{parameters.componentName }} ${{ parameters.cmakeTarget }}'
inputs:
workingDirectory: ${{ parameters.cmakeBuildDir }}
cmakeArgs: '--build ${{ parameters.cmakeTargetDir }} --target ${{ parameters.cmakeTarget }}'

View File

@@ -12,23 +12,31 @@ steps:
displayName: 'sudo apt-get update'
inputs:
targetType: inline
script: sudo apt-get update
script: sudo apt-get --yes update
env:
DEBIAN_FRONTEND: noninteractive
- task: Bash@3
displayName: 'sudo apt-get upgrade'
inputs:
targetType: inline
script: sudo apt-get update
script: sudo apt-get --yes upgrade
env:
DEBIAN_FRONTEND: noninteractive
- task: Bash@3
displayName: 'sudo apt-get fix'
inputs:
targetType: inline
script: sudo apt --yes --fix-broken install
env:
DEBIAN_FRONTEND: noninteractive
- ${{ if gt(length(parameters.aptPackages), 0) }}:
- task: Bash@3
displayName: 'sudo apt-get install ...'
inputs:
targetType: inline
script: sudo apt-get --yes install ${{ join(' ', parameters.aptPackages) }}
env:
DEBIAN_FRONTEND: noninteractive
- ${{ if gt(length(parameters.pipModules), 0) }}:
- task: Bash@3
displayName: 'pip install ...'

View File

@@ -31,11 +31,15 @@ parameters:
composable_kernel: $(composable-kernel-pipeline-id)
half: $(half-pipeline-id)
hipBLAS: $(hipblas-pipeline-id)
hipCUB: $(hipcub-pipeline-id)
HIPIFY: $(hipify-pipeline-id)
hipRAND: $(hiprand-pipeline-id)
hipSOLVER: $(hipsolver-pipeline-id)
hipSPARSE: $(hipsparse-pipeline-id)
llvm-project: $(llvm-project-pipeline-id)
MIOpen: $(miopen-pipeline-id)
MIVisionX: $(mivisionx-pipeline-id)
rdc: $(rdc-pipeline-id)
rocBLAS: $(rocblas-pipeline-id)
ROCdbgapi : $(rocdbgapi-pipeline-id)
rocDecode: $(rocdecode-pipeline-id)
@@ -52,6 +56,7 @@ parameters:
rocSOLVER: $(rocsolver-pipeline-id)
rocSPARSE: $(rocsparse-pipeline-id)
ROCT-Thunk-Interface: $(roct-thunk-interface-pipeline-id)
rocThrust: $(rocthrust-pipeline-id)
roctracer: $(roctracer-pipeline-id)
rpp: $(rpp-pipeline-id)
- name: taggedPipelineIdentifiers
@@ -65,11 +70,15 @@ parameters:
composable_kernel: $(composable-kernel-tagged-pipeline-id)
half: $(half-tagged-pipeline-id)
hipBLAS: $(hipblas-tagged-pipeline-id)
hipCUB: $(hipcub-tagged-pipeline-id)
HIPIFY: $(hipify-tagged-pipeline-id)
hipRAND: $(hiprand-tagged-pipeline-id)
hipSOLVER: $(hipsolver-tagged-pipeline-id)
hipSPARSE: $(hipsparse-tagged-pipeline-id)
llvm-project: $(llvm-project-tagged-pipeline-id)
MIOpen: $(miopen-tagged-pipeline-id)
MIVisionX: $(mivisionx-tagged-pipeline-id)
rdc: $(rdc-tagged-pipeline-id)
rocBLAS: $(rocblas-tagged-pipeline-id)
ROCdbgapi : $(rocdbgapi-tagged-pipeline-id)
rocDecode: $(rocdecode-tagged-pipeline-id)
@@ -86,6 +95,7 @@ parameters:
rocSOLVER: $(rocsolver-tagged-pipeline-id)
rocSPARSE: $(rocsparse-tagged-pipeline-id)
ROCT-Thunk-Interface: $(roct-thunk-interface-tagged-pipeline-id)
rocThrust: $(rocthrust-tagged-pipeline-id)
roctracer: $(roctracer-tagged-pipeline-id)
rpp: $(rpp-tagged-pipeline-id)
# set to true if you're calling this template file multiple files in same pipeline

View File

@@ -15,6 +15,7 @@ AOMP
APIC
APIs
APU
AQL
ASIC
ASICs
ASan
@@ -62,6 +63,7 @@ CommonMark
Concretized
Conda
ConnectX
DENORM
DGEMM
DKMS
DL
@@ -70,6 +72,7 @@ DNN
DNNL
DPM
DRI
DRM
DW
DWORD
Dask
@@ -85,6 +88,7 @@ ELMo
ENDPGM
EPYC
ESXi
FFFFFFF
FFT
FFTs
FFmpeg
@@ -122,6 +126,7 @@ GenAI
GenZ
GitHub
Gitpod
HBCC
HBM
HCA
HIPCC
@@ -132,6 +137,7 @@ HPCG
HPE
HPL
HSA
HW
HWE
Haswell
Higgs
@@ -157,11 +163,16 @@ Intra
Ioffe
JSON
Jupyter
KBytes
KERNARG
KFD
KiB
KMD
KVM
Keras
Kernarg
Khronos
Ki
LAPACK
LCLK
LDS
@@ -180,6 +191,7 @@ MiB
MIGraphX
MIOpen
MIOpenGEMM
MIPMAP
MIVisionX
MLM
MMA
@@ -222,6 +234,7 @@ NousResearch's
NumPy
OAM
OAMs
OBJFILE
OCP
OEM
OFED
@@ -237,6 +250,7 @@ OpenCV
OpenFabrics
OpenGL
OpenMP
OpenMPI
OpenSSL
OpenVX
PCI
@@ -261,6 +275,7 @@ RCCL
RDC
RDMA
RDNA
RGP
RHEL
ROC
ROCProfiler
@@ -274,6 +289,7 @@ ROCmCC
ROCmSoftwarePlatform
ROCmValidationSuite
ROCr
RPATH
RST
RW
Radeon
@@ -302,10 +318,12 @@ SMEM
SMI
SMT
SPI
SQTT
SQs
SRAM
SRAMECC
SVD
SVM
SWE
SerDes
Shlens
@@ -343,6 +361,8 @@ UIF
USM
UTCL
UTIL
UNBUNDLER
USWC
Uncached
Unhandled
VALU
@@ -357,6 +377,8 @@ VSIX
VSkipped
Vanhoucke
Vulkan
WERROR
WG
WGP
WGPs
WX
@@ -388,6 +410,8 @@ allocator
allocators
amdgpu
api
arg
args
atmi
atomics
autogenerated
@@ -400,6 +424,7 @@ bfloat
bilinear
bitsandbytes
blit
bool
boson
bosons
buildable
@@ -411,6 +436,10 @@ centos
centric
changelog
chiplet
clBuildProgram
clCompileProgram
clLinkProgram
clr
cmake
cmd
coalescable
@@ -426,6 +455,7 @@ convolutional
convolves
cpp
csn
cstring
cuBLAS
cuFFT
cuLIB
@@ -443,6 +473,7 @@ deallocation
denoise
denoised
denoises
denorm
denormalize
deserializers
detections
@@ -457,6 +488,7 @@ embeddings
enablement
endpgm
encodings
enqueue
env
epilog
etcetera
@@ -480,7 +512,9 @@ heterogenous
hipBLAS
hipBLASLt
hipCUB
hipConfig
hipFFT
hipHostMalloc
hipLIB
hipRAND
hipSOLVER
@@ -489,12 +523,15 @@ hipSPARSELt
hipTensor
hipamd
hipblas
hipcc
hipcub
hipfft
hipfort
hipify
hiprtc
hipsolver
hipsparse
hpc
hpp
hsa
hsakmt
@@ -509,6 +546,7 @@ initializer
inlining
installable
interprocedural
interprocess
intra
invariants
invocating
@@ -526,9 +564,12 @@ localscratch
logits
lossy
macOS
malloc
matchers
mem
microarchitecture
migraphx
mipmap
miopen
miopengemm
mivisionx
@@ -539,6 +580,8 @@ mvffr
namespace
namespaces
numref
nvcc
nvidia
ocl
opencl
opencv
@@ -559,6 +602,7 @@ prebuilt
precompiled
prefetch
prefetchable
prepinned
preprocess
preprocessed
preprocessing
@@ -589,6 +633,7 @@ rocFFT
rocLIB
rocMLIR
rocPRIM
rocProfiler
rocRAND
rocSOLVER
rocSPARSE
@@ -609,11 +654,13 @@ rocsolver
rocsparse
rocthrust
roctracer
rpath
runtime
runtimes
sL
scalability
scalable
sdma
sendmsg
serializers
shader
@@ -624,13 +671,17 @@ smi
softmax
spack
src
stderr
stochastically
strided
stubing
suballocaitons
subdirectory
subexpression
subfolder
subfolders
supercomputing
td
tensorfloat
th
tokenization
@@ -647,6 +698,8 @@ tqdm
tracebacks
txt
uarch
uint
unbundler
uncached
uncorrectable
uninstallation
@@ -677,12 +730,14 @@ wavefronts
whitespaces
workgroup
workgroups
workitems
writeback
writebacks
wrreq
wzo
xargs
xf
xz
yaml
ysvmadyb
zypper
zypper

View File

@@ -164,7 +164,9 @@ ROCm™ 6.1.1 introduces minor fixes and improvements to some tools and librarie
### OS support
ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
* ROCm 6.1.1 now supports Oracle Linux. It has been tested against version 8.9 (kernel 5.15.0-205) with AMD Instinct MI300X accelerators.
* ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
### AMD SMI
@@ -1455,7 +1457,7 @@ Note: These complex operations are equivalent to corresponding types/functions o
* `HIP_ROCclr`
* NVIDIA platform
* `HIP_PLATFORM_NVCC`
* The [hcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hip/hcc_detail) and [nvcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hips/nvcc_detail) directories in the clr repository are removed.
* The `hcc_detail` and `nvcc_detail` directories in the clr repository are removed.
* Deprecated gcnArch is removed from hip device struct `hipDeviceProp_t`.
* Deprecated `enum hipMemoryType memoryType;` is removed from HIP struct `hipPointerAttribute_t` union.

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -56,12 +56,103 @@ cd ~/ROCm/
**Note:** Using this sample code will cause the repo tool to download the open source code associated with the specified ROCm release. Ensure that you have ssh-keys configured on your machine for your GitHub ID prior to the download as explained at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh).
### Building the ROCm source code
## Building the ROCm source code
Each ROCm component repository contains directions for building that component, such as the rocSPARSE documentation [Installation and Building for Linux](https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/install/Linux_Install_Guide.html). Refer to the specific component documentation for instructions on building the repository.
Each release of the ROCm software supports specific hardware and software configurations. Refer to [System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) for the current supported hardware and OS.
## Build ROCm from source
The Build will use as many processors as it can find to build in parallel. Some of the compiles can consume as much as 10GB of RAM, so make sure you have plenty of Swap Space !
By default the ROCm build will compile for all supported GPU architectures and will take approximately 500 CPU hours.
The Build time will reduce significantly if we limit the GPU Architecture/s against which we need to build by using the environment variable GPU_ARCHS as mentioned below.
```bash
# --------------------------------------
# Step1: clone source code
# --------------------------------------
mkdir -p ~/WORKSPACE/ # Or any folder name other than WORKSPACE
cd ~/WORKSPACE/
export ROCM_VERSION=6.1.0 # or 6.1.1 6.1.2
~/bin/repo init -u http://github.com/ROCm/ROCm.git -b roc-6.1.x -m tools/rocm-build/rocm-${ROCM_VERSION}.xml
~/bin/repo sync
# --------------------------------------
# Step 2: Prepare build environment
# --------------------------------------
# Option 1: Start a docker container
# Pulling required base docker images:
# Ubuntu20.04 built from ROCm/tools/rocm-build/docker/ubuntu20/Dockerfile
docker pull rocm/rocm-build-ubuntu-20.04:6.1
# Ubuntu22.04 built from ROCm/tools/rocm-build/docker/ubuntu22/Dockerfile
docker pull rocm/rocm-build-ubuntu-22.04:6.1
# Start docker container and mount the source code folder:
docker run -ti \
-e ROCM_VERSION=${ROCM_VERSION} \
-e CCACHE_DIR=$HOME/.ccache \
-e CCACHE_ENABLED=true \
-e DOCK_WORK_FOLD=/src \
-w /src \
-v $PWD:/src \
-v /etc/passwd:/etc/passwd \
-v /etc/shadow:/etc/shadow \
-v ${HOME}/.ccache:${HOME}/.ccache \
-u $(id -u):$(id -g) \
<replace_with_required_ubuntu_base_docker_image> bash
# Option 2: Install required packages into the host machine
# For ubuntu20.04 system
cd ROCm/tools/rocm-build/docker/ubuntu20
bash install-prerequisites.sh
# For ubuntu22.04 system
cd ROCm/tools/rocm-build/docker/ubuntu22
bash install-prerequisities.sh
# --------------------------------------
# Step 3: Run build command line
# --------------------------------------
# Select GPU targets before building:
# When GPU_ARCHS is not set, default GPU targets supported by ROCm6.1 will be used.
# To build against a subset of GFX architectures you can use the below env variable.
# Support MI300 (gfx940, gfx941, gfx942).
export GPU_ARCHS="gfx942" # Example
export GPU_ARCHS="gfx940;gfx941;gfx942" # Example
# Pick and run build commands in the docker container:
# Build rocm-dev packages
make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} rocm-dev
# Build all ROCm packages
make -f ROCm/tools/rocm-build/ROCm.mk -j ${NPROC:-$(nproc)} all
# list all ROCm components to find required components
make -f ROCm/tools/rocm-build/ROCm.mk list_components
# Build a single ROCm packages
make -f ROCm/tools/rocm-build/ROCm.mk T_rocblas
# Find built packages in ubuntu20.04:
out/ubuntu-20.04/20.04/deb/
# Find built packages in ubuntu22.04:
out/ubuntu-22.04/22.04/deb/
# Find built logs in ubuntu20.04:
out/ubuntu-20.04/20.04/logs/
# Find built logs in ubuntu22.04:
out/ubuntu-22.04/22.04/logs/
# All logs pertaining to failed components, end with .errrors extension.
out/ubuntu-22.04/22.04/logs/rocblas.errors # Example
# All logs pertaining to building components, end with .inprogress extension.
out/ubuntu-22.04/22.04/logs/rocblas.inprogress # Example
# All logs pertaining to passed components, use the component names.
out/ubuntu-22.04/22.04/logs/rocblas # Example
```
Note: [Overview for ROCm.mk](tools/rocm-build/README.md)
## ROCm documentation
This repository contains the [manifest file](https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md)

View File

@@ -77,8 +77,7 @@ Obtain the value of `gpu-arch` by running the following command:
[//]: # (dated link below, needs updating)
See the complete list of compiler command-line references
[here](https://github.com/ROCm/llvm-project/blob/amd-stg-open/clang/docs/CommandGuide/clang.rst).
See the complete list of [compiler command-line references](https://github.com/ROCm/llvm-project/blob/amd-staging/openmp/docs/CommandLineArgumentReference.rst).
### Using `rocprof` with OpenMP

View File

@@ -17,7 +17,7 @@ following section.
## ROCm component licenses
ROCm is released by Advanced Micro Devices, Inc. and is licensed per component separately.
ROCm is released by Advanced Micro Devices, Inc. (AMD) and is licensed per component separately.
The following table is a list of ROCm components with links to their respective license
terms. These components may include third party components subject to
additional licenses. Please review individual repositories for more information.
@@ -25,66 +25,71 @@ additional licenses. Please review individual repositories for more information.
<!-- spellcheck-disable -->
| Component | License |
|:---------------------|:-------------------------|
| [AMDMIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
| [HIPCC](https://github.com/ROCm/HIPCC/blob/develop/LICENSE.txt) | [MIT](https://github.com/ROCm/HIPCC/blob/develop/LICENSE.txt) |
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/develop/LICENSE.txt) |
| [MIOpenGEMM](https://github.com/ROCm/MIOpenGEMM/) | [MIT](https://github.com/ROCm/MIOpenGEMM/blob/master/LICENSE.txt) |
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/master/LICENSE.txt) |
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/master/LICENSE.txt) |
| [RCP](https://github.com/GPUOpen-Tools/radeon_compute_profiler/) | [MIT](https://github.com/GPUOpen-Tools/radeon_compute_profiler/blob/master/LICENSE) |
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
| [ROCclr](https://github.com/ROCm/ROCclr/) | [MIT](https://github.com/ROCm/ROCclr/blob/develop/LICENSE.txt) |
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-master/LICENSE.txt) |
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
| [ROCm-CompilerSupport](https://github.com/ROCm/ROCm-CompilerSupport/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCm-CompilerSupport/blob/amd-stg-open/LICENSE.txt) |
| [ROCm-Device-Libs](https://github.com/ROCm/ROCm-Device-Libs/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCm-Device-Libs/blob/amd-stg-open/LICENSE.TXT) |
| [ROCm-OpenCL-Runtime/api/opencl/khronos/icd](https://github.com/KhronosGroup/OpenCL-ICD-Loader/) | [Apache 2.0](https://github.com/KhronosGroup/OpenCL-ICD-Loader/blob/main/LICENSE) |
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/ROCm-OpenCL-Runtime/) | [MIT](https://github.com/ROCm/ROCm-OpenCL-Runtime/blob/develop/LICENSE.txt) |
| [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
| [AMDMIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
| [AMD Common Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/develop/LICENCE) |
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
| [hipamd](https://github.com/ROCm/clr/tree/develop/hipamd) | [MIT](https://github.com/ROCm/clr/blob/develop/hipamd/LICENSE.txt) |
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/develop/opencl) | [MIT](https://github.com/ROCm/clr/blob/develop/opencl/LICENSE.txt) |
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
| [atmi](https://github.com/ROCm/atmi/) | [MIT](https://github.com/ROCm/atmi/blob/master/LICENSE.txt) |
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
| [clang-ocl](https://github.com/ROCm/clang-ocl/) | [MIT](https://github.com/ROCm/clang-ocl/blob/master/LICENSE) |
| [flang](https://github.com/ROCm/flang/) | [Apache 2.0](https://github.com/ROCm/flang/blob/master/LICENSE.txt) |
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/master/LICENSE.txt) |
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
| [ROCT-Thunk-Interface](https://github.com/ROCm/ROCT-Thunk-Interface/) | [MIT](https://github.com/ROCm/ROCT-Thunk-Interface/blob/master/LICENSE.md) |
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/master/LICENSE.txt) |
| [ROCR Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
| [hipFORT](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
| [hipamd](https://github.com/ROCm/hipamd/) | [MIT](https://github.com/ROCm/hipamd/blob/develop/LICENSE.txt) |
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/master/LICENSE) |
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/main/LICENSE.TXT) |
| [rccl](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
| [rdc](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/master/LICENSE) |
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
| [rocm-cmake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
| [rocm_bandwidth_test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
| [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_smi_lib/blob/master/License.txt) |
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/master/License.txt) |
| [rocprofiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
| [rocr_debug_agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/master/LICENSE.txt) |
| [roctracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
| rocm-llvm-alt | [AMD Proprietary License](https://www.amd.com/en/support/amd-software-eula)
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/develop/LICENSE) |
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v2.0](https://github.com/ROCm/ROCgdb/blob/amd-master/COPYING) |
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/develop/License.txt) |
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/develop/LICENSE) |
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-master/LICENSE) |
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
| [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html)
Open sourced ROCm components are released via public GitHub
repositories, packages on https://repo.radeon.com and other distribution channels.
Proprietary products are only available on https://repo.radeon.com. Currently, only
one component of ROCm, rocm-llvm-alt is governed by a proprietary license.
repositories, packages on [https://repo.radeon.com](https://repo.radeon.com) and other distribution channels.
Proprietary products are only available on [https://repo.radeon.com](https://repo.radeon.com). Currently, only
one component of ROCm, `rocm-llvm-alt` is governed by a proprietary license.
Proprietary components are organized in a proprietary subdirectory in the package
repositories to distinguish from open sourced packages.
@@ -92,7 +97,7 @@ repositories to distinguish from open sourced packages.
The following additional terms and conditions apply to your use of ROCm technical documentation.
```
©2023 Advanced Micro Devices, Inc. All rights reserved.
©2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
The information presented in this document is for informational purposes only
and may contain technical inaccuracies, omissions, and typographical errors. The
@@ -125,8 +130,8 @@ companies.
:::{attention}
AQL Profiler and AOCC CPU optimization are both provided in binary form, each
subject to the license agreement enclosed in the directory for the binary and is
available here: `/opt/rocm/share/doc/rocm-llvm-alt/EULA`. By using, installing,
subject to the license agreement enclosed in the directory for the binary available
in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
the terms and conditions of this license agreement. If you do not agree to the
terms of this agreement, do not install, copy or use the AQL Profiler and/or the
@@ -134,9 +139,8 @@ AOCC CPU Optimizations.
:::
For the rest of the ROCm packages, you can find the licensing information at the
following location: `/opt/rocm/share/doc/<component-name>/`
following location: `/opt/rocm/share/doc/<component-name>/` or in the locations
specified in the preceding table.
For example, you can fetch the licensing information of the `_amd_comgr_`
component (Code Object Manager) from the `amd_comgr` folder. A file named
`LICENSE.txt` contains the license details at:
`/opt/rocm-5.4.3/share/doc/amd_comgr/LICENSE.txt`
For example, you can fetch the licensing information of the `amd_comgr`
component (Code Object Manager) from the `/opt/rocm/share/doc/amd_comgr/LICENSE.txt` file.

View File

@@ -17,10 +17,11 @@ Use this matrix to view the ROCm compatibility across successive major and minor
:doc:`Operating Systems <rocm-install-on-linux:reference/system-requirements>`, "Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3"
,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
,"RHEL 9.3, 9.2","RHEL 9.3, 9.2"
,"RHEL 9.4 [#red-hat94]_, 9.3, 9.2","RHEL 9.3, 9.2"
,"RHEL 8.9, 8.8","RHEL 8.9, 8.8"
,"SLES 15 SP5, SP4","SLES 15 SP5, SP4"
,CentOS 7.9,CentOS 7.9
,"Oracle Linux 8.9 [#oracle89]_"
,,
:doc:`GFX Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3
,CDNA2,CDNA2
@@ -36,9 +37,9 @@ Use this matrix to view the ROCm compatibility across successive major and minor
,,
ECOSYSTEM SUPPORT:,,
:doc:`PyTorch <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`,"2.1, 2.0, 1.13","2.1, 2.0, 1.13"
:doc:`Tensorflow <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`,"2.15, 2.14, 2.13","2.14, 2.13, 2.12"
:doc:`TensorFlow <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`,"2.15, 2.14, 2.13","2.14, 2.13, 2.12"
:doc:`JAX <rocm-install-on-linux:how-to/3rd-party/jax-install>`,0.4.26,0.4.26
`ONNX-RT <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.14.1
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.17.3,1.14.1
,,
3RD PARTY COMMUNICATION LIBS:,,
`UCC <https://github.com/ROCm/ucc>`_,>=1.2.0,>=1.2.0
@@ -52,12 +53,12 @@ Use this matrix to view the ROCm compatibility across successive major and minor
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0
:doc:`MIGraphX <amdmigraphx:index>`,2.9.0,2.8.0
:doc:`MIOpen <miopen:index>`,3.1.0,3.0.0
:doc:`MIVisionX <mivisionx:doxygen/html/index>`,2.5.0,2.5.0
:doc:`MIVisionX <mivisionx:index>`,2.5.0,2.5.0
:doc:`rocDecode <rocdecode:index>`,0.5.0,N/A
:doc:`RPP <rpp:index>`,1.5.0,1.4.0
:doc:`ROCm Performance Primitives (RPP) <rpp:index>`,1.5.0,1.4.0
,,
COMMUNICATION:,,
:doc:`rccl <rccl:index>`,2.18.6,2.18.3
:doc:`RCCL <rccl:index>`,2.18.6,2.18.3
,,
MATH LIBS:,,
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0
@@ -86,7 +87,7 @@ Use this matrix to view the ROCm compatibility across successive major and minor
,,
SUPPORT LIBS:,,
`hipother <https://github.com/ROCm/hipother>`_,6.1.40091,6.0.32830
`rocm-cmake <https://github.com/ROCm/rocm-cmake>`_,0.12.0,0.11.0
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.12.0,0.11.0
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.1.0,6.0.0
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,20240125.3.30,20231016.2.245
,,
@@ -94,20 +95,19 @@ Use this matrix to view the ROCm compatibility across successive major and minor
:doc:`AMD SMI <amdsmi:index>`,24.4.1,23.4.2
:doc:`HIPIFY <hipify:index>`,17.0.0,17.0.0
:doc:`ROCdbgapi <rocdbgapi:index>`,0.71.0,0.71.0
`ROCdebug-Agent <https://github.com/ROCm/rocr_debug_agent>`_,2.0.3,2.0.3
:doc:`rocGDB <rocgdb:index>`,14.1.0,13.2.0
:doc:`rocProfiler <rocprofiler:profiler_home_page>`,2.0.60100,2.0.0
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60100,2.0.0
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.3.0,N/A
:doc:`rocTracer <roctracer:index>`,4.1.60100,4.1.0
`rocm_bandwidth_test <https://github.com/ROCm/rocm_bandwidth_test>`_,1.4.0,1.4.0
:doc:`ROCTracer <roctracer:index>`,4.1.60100,4.1.0
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0
`rocminfo <https://github.com/ROCm/rocminfo>`_,1.0.0,1.0.0
:doc:`ROCm SMI Lib <rocm_smi_lib:index>`,7.0.0,6.0.0
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,14.1.0,13.2.0
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.0.0,6.0.0
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,rocm-6.1.0,rocm-6.0.0
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.3,2.0.3
:doc:`TransferBench <transferbench:index>`,1.48,1.46
,,
COMPILERS:,,
`AOMP <https://github.com/ROCm/aomp>`_,17.60.0,17.60.0
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,0.5.0,0.5.0
`Flang <https://github.com/ROCm/flang>`_,17.0.0.24103,17.0.0.23483
`llvm-project <https://github.com/ROCm/llvm-project>`_,17.0.0.24103,17.0.0.23483
@@ -116,11 +116,13 @@ Use this matrix to view the ROCm compatibility across successive major and minor
RUNTIMES:,,
:doc:`HIP <hip:index>`,6.1.40091,6.0.32830
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0
`ROCR Runtime <https://github.com/ROCm/ROCR-Runtime>`_,1.13.0,1.12.0
:doc:`ROCR-Runtime <rocr-runtime:index>`,1.13.0,1.12.0
.. rubric:: Footnotes
.. [#] **For ROCm 6.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.3 & 8.9 and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
.. [#red-hat94] **For ROCm 6.1** - RHEL 9.4 is supported only on AMD Instinct MI300A.
.. [#oracle89] **For ROCm 6.1.1** - Oracle Linux is supported only on AMD Instinct MI300X.
.. [#] **For ROCm 6.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
.. [#] **For ROCm 6.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9 and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.

View File

@@ -1,47 +0,0 @@
.. meta::
:description: Setting the number of CUs
:keywords: AMD, ROCm, cu, number of cus
.. _env-variables-reference:
*************************************************************
Setting the number of CUs
*************************************************************
When using GPUs to accelerate compute workloads, it sometimes becomes necessary
to configure the hardware's usage of Compute Units (CU). This is a more advanced
option, so please read this page before experimentation.
The GPU driver provides two environment variables to set the number of CUs used. The
first one is ``HSA_CU_MASK`` and the second one is ``ROC_GLOBAL_CU_MASK``. The main
difference is that ``ROC_GLOBAL_CU_MASK`` sets the CU mask on queues created by the HIP
or the OpenCL runtimes. While ``HSA_CU_MASK`` sets the mask on a lower level of queue
creation in the driver, this mask will also be set for queues being profiled.
The environment variables have the following syntax:
::
ID = [0-9][0-9]* ex. base 10 numbers
ID_list = (ID | ID-ID)[, (ID | ID-ID)]* ex. 0,2-4,7
GPU_list = ID_list ex. 0,2-4,7
CU_list = 0x[0-F]* | ID_list ex. 0x337F OR 0,2-4,7
CU_Set = GPU_list : CU_list ex. 0,2-4,7:0-15,32-47 OR 0,2-4,7:0x337F
HSA_CU_MASK = CU_Set [; CU_Set]* ex. 0,2-4,7:0-15,32-47; 3-9:0x337F
The GPU indices are taken post ``ROCR_VISIBLE_DEVICES`` reordering. For GPUs listed,
the listed or masked CUs will be enabled, the rest disabled. Unlisted GPUs will not
be affected, their CUs will all be enabled.
The parsing of the variable is stopped when a syntax error occurs. The erroneous set
and the ones following will be ignored. Repeating GPU or CU IDs are a syntax error.
Specifying a mask with no usable CUs (CU_list is 0x0) is a syntax error. For excluding
GPU devices use ``ROCR_VISIBLE_DEVICES``.
These environment variables only affect ROCm software, not graphics applications.
It's important to know that not all CU configurations are valid on all devices. For
instance, on devices where two CUs can be combined into a WGP (for kernels running in
WGP mode), it is not valid to disable only a single CU in a WGP. `This paper
<https://www.cs.unc.edu/~otternes/papers/rtsj2022.pdf>`_ can provide more information
about what to expect, when disabling CUs.

View File

@@ -13,7 +13,9 @@ This document provides documentation on using ROCm ASan.
For information about LLVM ASan, see the [LLVM documentation](https://clang.llvm.org/docs/AddressSanitizer.html).
**Note:** The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04.
:::{note}
The beta release of LLVM ASan for ROCm is currently tested and validated on Ubuntu 20.04.
:::
## Compiling for ASan
@@ -34,9 +36,13 @@ Recommendations for doing this are:
Other architectures are allowed, but their device code will not be instrumented and a warning will be emitted.
**Note:** It is not an error to compile some files without ASan instrumentation, but doing so reduces the ability of the process to detect addressing errors. However, if the main program "`a.out`" does not directly depend on the ASan runtime (`libclang_rt.asan-x86_64.so`) after the build completes (check by running `ldd` (List Dynamic Dependencies) or `readelf`), the application will immediately report an error at runtime as described in the next section.
:::{tip}
It is not an error to compile some files without ASan instrumentation, but doing so reduces the ability of the process to detect addressing errors. However, if the main program "`a.out`" does not directly depend on the ASan runtime (`libclang_rt.asan-x86_64.so`) after the build completes (check by running `ldd` (List Dynamic Dependencies) or `readelf`), the application will immediately report an error at runtime as described in the next section.
:::
**Note:** When compiling OpenMP programs with ASan instrumentation, it is currently necessary to set the environment variable `LIBRARY_PATH` to `/opt/rocm-<version>/lib/llvm/lib/asan:/opt/rocm-<version>/lib/asan`. At runtime, it may be necessary to add `/opt/rocm-<version>/lib/llvm/lib/asan` to `LD_LIBRARY_PATH`.
:::{note}
When compiling OpenMP programs with ASan instrumentation, it is currently necessary to set the environment variable `LIBRARY_PATH` to `/opt/rocm-<version>/lib/llvm/lib/asan:/opt/rocm-<version>/lib/asan`. At runtime, it may be necessary to add `/opt/rocm-<version>/lib/llvm/lib/asan` to `LD_LIBRARY_PATH`.
:::
### About compilation time
@@ -92,15 +98,23 @@ If it does not appear, when executed the application will quickly output an ASan
There is an environment variable, `ASAN_OPTIONS`, that can be used to adjust the runtime behavior of the ASan runtime itself. There are more than a hundred "flags" that can be adjusted (see an old list at [flags](https://github.com/google/sanitizers/wiki/AddressSanitizerFlags)) but the default settings are correct and should be used in most cases. It must be noted that these options only affect the host ASan runtime. The device runtime only currently supports the default settings for the few relevant options.
There are two `ASAN_OPTION` flags of particular note.
There are three `ASAN_OPTION` flags of note.
* `halt_on_error=0/1 default 1`.
This tells the ASan runtime to halt the application immediately after detecting and reporting an addressing error. The default makes sense because the application has entered the realm of undefined behavior. If the developer wishes to have the application continue anyway, this option can be set to zero. However, the application and libraries should then be compiled with the additional option `-fsanitize-recover=address`. Note that the ROCm optional ASan instrumented libraries are not compiled with this option and if an error is detected within one of them, but halt_on_error is set to 0, more undefined behavior will occur.
This tells the ASan runtime to halt the application immediately after detecting and reporting an addressing error. The default makes sense because the application has entered the realm of undefined behavior. If the developer wishes to have the application continue anyway, this option can be set to zero. However, the application and libraries should then be compiled with the additional option `-fsanitize-recover=address`. Note that the ROCm optional ASan instrumented libraries are not compiled with this option and if an error is detected within one of them, but halt_on_error is set to 0, more undefined behavior will occur.
* `detect_leaks=0/1 default 1`.
This option directs the ASan runtime to enable the [Leak Sanitizer](https://clang.llvm.org/docs/LeakSanitizer.html) (LSan). Unfortunately, for heterogeneous applications, this default will result in significant output from the leak sanitizer when the application exits due to allocations made by the language runtime which are not considered to be leaks. This output can be avoided by adding `detect_leaks=0` to the `ASAN_OPTIONS`, or alternatively by producing an LSan suppression file (syntax described [here](https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer)) and activating it with environment variable `LSAN_OPTIONS=suppressions=/path/to/suppression/file`. When using a suppression file, a suppression report is printed by default. The suppression report can be disabled by using the `LSAN_OPTIONS` flag `print_suppressions=0`.
This option directs the ASan runtime to enable the [Leak Sanitizer](https://clang.llvm.org/docs/LeakSanitizer.html) (LSan). For heterogeneous applications, this default results in significant output from the leak sanitizer when the application exits due to allocations made by the language runtime which are not considered to be leaks. This output can be avoided by adding `detect_leaks=0` to the `ASAN_OPTIONS`, or alternatively by producing an LSan suppression file (syntax described [here](https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer)) and activating it with environment variable `LSAN_OPTIONS=suppressions=/path/to/suppression/file`. When using a suppression file, a suppression report is printed by default. The suppression report can be disabled by using the `LSAN_OPTIONS` flag `print_suppressions=0`.
* `quarantine_size_mb=N default 256`
This option defines the number of megabytes (MB) `N` of memory that the ASan runtime will hold after it is `freed` to detect use-after-free situations. This memory is unavailable for other purposes. The default of 256 MB may be too small to detect some use-after-free situations, especially given that the large size of many GPU memory allocations may push `freed` allocations out of quarantine before the attempted use.
:::{note}
Setting the value of `quarantine_size_mb` larger may enable more problematic uses to be detected, but at the cost of reducing memory available for other purposes.
:::
## Runtime overhead
@@ -186,7 +200,7 @@ or
currently may include one or two surprising CPU side tracebacks mentioning :`hostcall`". This is due to how `malloc` and `free` are implemented for GPU code and these call stacks can be ignored.
### Running with `rocgdb`
## Running ASan with `rocgdb`
`rocgdb` can be used to further investigate ASan detected errors, with some preparation.
@@ -238,7 +252,7 @@ $ rocgdb <path to application>
(gdb) c
```
### Using ASan with a short HIP application
## Using ASan with a short HIP application
Consider the following simple and short demo of using the Address Sanitizer with a HIP application:
@@ -402,7 +416,7 @@ Shadow byte legend (one shadow byte represents 8 application bytes):
==2817==ABORTING
```
### Known issues with using GPU sanitizer
## Known issues with using GPU sanitizer
* Red zones must have limited size. It is possible for an invalid access to completely miss a red zone and not be detected.

View File

@@ -57,34 +57,10 @@ article_pages = [
"date":"2024-06-04"
},
{"file":"install/windows/install-quick", "os":["windows"]},
{"file":"install/linux/install-quick", "os":["linux"]},
{"file":"install/linux/install", "os":["linux"]},
{"file":"install/linux/install-options", "os":["linux"]},
{"file":"install/linux/prerequisites", "os":["linux"]},
{"file":"install/docker", "os":["linux"]},
{"file":"install/magma-install", "os":["linux"]},
{"file":"install/pytorch-install", "os":["linux"]},
{"file":"install/tensorflow-install", "os":["linux"]},
{"file":"install/windows/install", "os":["windows"]},
{"file":"install/windows/prerequisites", "os":["windows"]},
{"file":"install/windows/cli/index", "os":["windows"]},
{"file":"install/windows/gui/index", "os":["windows"]},
{"file":"about/compatibility/docker-image-support-matrix", "os":["linux"]},
{"file":"about/compatibility/user-kernel-space-compat-matrix", "os":["linux"]},
{"file":"reference/library-index", "os":["linux"]},
{"file":"how-to/deep-learning-rocm", "os":["linux"]},
{"file":"how-to/gpu-enabled-mpi", "os":["linux"]},
{"file":"how-to/system-debugging", "os":["linux"]},
{"file":"how-to/tuning-guides", "os":["linux", "windows"]},
{"file":"rocm-a-z", "os":["linux", "windows"]},
]
exclude_patterns = ['temp']
@@ -108,5 +84,5 @@ html_theme_options = {
}
redirects = {
"reference/openmp/openmp": "../../about/compatibility/openmp.html"
"reference/openmp/openmp": "../../about/compatibility/openmp.html"
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 95 KiB

View File

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 44 KiB

View File

Before

Width:  |  Height:  |  Size: 112 KiB

After

Width:  |  Height:  |  Size: 112 KiB

View File

Before

Width:  |  Height:  |  Size: 188 KiB

After

Width:  |  Height:  |  Size: 188 KiB

View File

Before

Width:  |  Height:  |  Size: 138 KiB

After

Width:  |  Height:  |  Size: 138 KiB

View File

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 62 KiB

View File

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

View File

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 86 KiB

View File

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 49 KiB

View File

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 45 KiB

View File

Before

Width:  |  Height:  |  Size: 288 KiB

After

Width:  |  Height:  |  Size: 288 KiB

View File

Before

Width:  |  Height:  |  Size: 153 KiB

After

Width:  |  Height:  |  Size: 153 KiB

View File

Before

Width:  |  Height:  |  Size: 219 KiB

After

Width:  |  Height:  |  Size: 219 KiB

View File

Before

Width:  |  Height:  |  Size: 80 KiB

After

Width:  |  Height:  |  Size: 80 KiB

View File

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 73 KiB

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 43 KiB

View File

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 187 KiB

View File

@@ -8,44 +8,14 @@ Installing deep learning frameworks for ROCm
ROCm provides a comprehensive ecosystem for deep learning development, including
:ref:`libraries <artificial-intelligence-apis>` for optimized deep learning operations and ROCm-aware versions of popular
deep learning frameworks and libraries such as PyTorch, TensorFlow, JAX, and MAGMA. ROCm works closely with these
deep learning frameworks and libraries such as PyTorch, TensorFlow, and JAX. ROCm works closely with these
frameworks to ensure that framework-specific optimizations take advantage of AMD accelerator and GPU architectures.
The following guides cover installation processes for ROCm-aware deep learning frameworks.
.. grid::
.. grid-item::
:columns: 3
:doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
.. grid-item::
:columns: 3
:doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
:doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
.. grid-item::
:columns: 3
:doc:`MAGMA for ROCm <rocm-install-on-linux:how-to/3rd-party/magma-install>`
.. grid-item::
:columns: 3
.. grid-item::
:columns: 3
* :doc:`PyTorch for ROCm <rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
* :doc:`TensorFlow for ROCm <rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
* :doc:`JAX for ROCm <rocm-install-on-linux:how-to/3rd-party/jax-install>`
The following chart steps through typical installation workflows for installing deep learning frameworks for ROCm.
@@ -65,4 +35,4 @@ through the following guides.
* :doc:`rocm-for-ai/index`
* :doc:`fine-tuning-llms/index`
* :doc:`llm-fine-tuning-optimization/index`

View File

@@ -28,18 +28,9 @@ graphs, tensor parallel multi-GPU, GPTQ, AWQ, and token speculation.
Installing vLLM
---------------
1. To install vLLM, run the following commands.
.. code-block:: shell
# Install from source
git clone https://github.com/ROCm/vllm.git
cd vllm
PYTORCH_ROCM_ARCH=gfx942 python setup.py install #MI300 series
.. _fine-tuning-llms-vllm-rocm-docker-image:
2. Run the following commands to build a Docker image ``vllm-rocm``.
1. Run the following commands to build a Docker image ``vllm-rocm``.
.. code-block:: shell
@@ -52,7 +43,7 @@ Installing vLLM
.. tab-item:: vLLM on a single-accelerator system
:sync: single
3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.
.. code-block:: shell
@@ -69,7 +60,7 @@ Installing vLLM
vllm-rocm \
bash
4. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.
3. Inside the container, start the API server to run on a single accelerator on port 8000 using the following command.
.. code-block:: shell
@@ -77,11 +68,11 @@ Installing vLLM
The following log message is displayed in your command line indicates that the server is listening for requests.
.. image:: ../../data/how-to/fine-tuning-llms/vllm-single-gpu-log.png
.. image:: ../../data/how-to/llm-fine-tuning-optimization/vllm-single-gpu-log.png
:alt: vLLM API server log message
:align: center
5. To test, send it a curl request containing a prompt.
4. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -92,11 +83,11 @@ Installing vLLM
.. code-block:: text
{"text":["What is AMD Instinct?\nAmd Instinct is a brand new line of high-performance computing (HPC) processors from Advanced Micro Devices (AMD). These processors are designed to deliver unparalleled performance for HPC workloads, including scientific simulations, data analytics, and machine learning.\nThe Instinct lineup includes a range of processors, from the entry-level Inst"]}
.. tab-item:: vLLM on a multi-accelerator system
:sync: multi
3. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
2. To use vLLM as an API server to serve reference requests, first start a container using the :ref:`vllm-rocm
Docker image <fine-tuning-llms-vllm-rocm-docker-image>`.
.. code-block:: shell
@@ -114,14 +105,14 @@ Installing vLLM
bash
4. To run API server on multiple GPUs, use the ``-tp`` or ``--tensor-parallel-size`` parameter. For example, to use two
3. To run API server on multiple GPUs, use the ``-tp`` or ``--tensor-parallel-size`` parameter. For example, to use two
GPUs, start the API server using the following command.
.. code-block:: shell
python -m vllm.entrypoints.api_server --model /app/model --dtype float16 -tp 2 --port 8000 &
5. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
4. To run multiple instances of API Servers, specify different ports for each server, and use ``ROCR_VISIBLE_DEVICES`` to
isolate each instance to a different accelerator.
For example, to run two API servers, one on port 8000 using GPU 0 and 1, one on port 8001 using GPU 2 and 3, use a
@@ -132,7 +123,7 @@ Installing vLLM
ROCR_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 tp 2 --port 8000 &
ROCR_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.api_server --model /data/llama-2-7b-chat-hf --dtype float16 tp 2--port 8001 &
6. To test, send it a curl request containing a prompt.
5. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -163,27 +154,29 @@ speculation.
Install TGI
-----------
1. To install the TGI Docker image, run the following commands.
1. Launch the TGI Docker container in the host machine.
.. code-block:: shell
# Install from Dockerfile
git clone https://github.com/huggingface/text-generation-inference.git -b mi300-compat
cd text-generation-inference
docker build . -f Dockerfile.rocm
docker run --name tgi --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
--device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 256g
--net host -v $PWD:/data
--entrypoint "/bin/bash"
--env HUGGINGFACE_HUB_CACHE=/data
ghcr.io/huggingface/text-generation-inference:latest-rocm
.. tab-set::
.. tab-item:: TGI on a single-accelerator system
:sync: single
2. Launch a model using TGI server on a single accelerator.
2. Inside the container, launch a model using TGI server on a single accelerator.
.. code-block:: shell
export ROCM_USE_FLASH_ATTN_V2_TRITON=True
text-generation-launcher --model-id NousResearch/Meta-Llama-3-70B --dtype float16 --port 8000 &
3. To test, send it a curl request containing a prompt.
.. code-block:: shell
@@ -191,26 +184,26 @@ Install TGI
curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
You should receive a response like the following.
.. code-block:: shell
data:{"index":20,"token":{"id":304,"text":" in","logprob":-1.2822266,"special":false},"generated_text":" AMD Instinct is a new family of data center GPUs designed to accelerate the most demanding workloads in","details":null}
.. tab-item:: TGI on a multi-accelerator system
2. Launch a model using TGI server on multiple accelerators (4 in this case).
2. Inside the container, launch a model using TGI server on multiple accelerators (4 in this case).
.. code-block:: shell
export ROCM_USE_FLASH_ATTN_V2_TRITON=True
text-generation-launcher --model-id NousResearch/Meta-Llama-3-8B --dtype float16 --port 8000 --num-shard 4 &
3. To test, send it a curl request containing a prompt.
.. code-block:: shell
curl http://localhost:8000/generate_stream -X POST -d '{"inputs":"What is AMD Instinct?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
You should receive a response like the following.
.. code-block:: shell

View File

@@ -18,7 +18,7 @@ Attention (GQA), and Multi-Query Attention (MQA). This reduction in memory movem
time-to-first-token (TTFT) latency for large batch sizes and long prompt sequences, thereby enhancing overall
performance.
.. image:: ../../data/how-to/fine-tuning-llms/attention-module.png
.. image:: ../../data/how-to/llm-fine-tuning-optimization/attention-module.png
:alt: Attention module of a large language module utilizing tiling
:align: center
@@ -243,7 +243,7 @@ page describes the options.
Validator,ROCBLAS_VERSION,4.1.0-cefa4a9b-dirty
GemmTunableOp_float_TN,tn_200_100_20,Gemm_Rocblas_32323,0.00669595
.. image:: ../../data/how-to/fine-tuning-llms/tunableop.png
.. image:: ../../data/how-to/llm-fine-tuning-optimization/tunableop.png
:alt: GEMM and TunableOp
:align: center

View File

@@ -154,13 +154,13 @@ kernels by configuring the ``exllama_config`` parameter as the following.
.. code-block:: python
from transformers import AutoModelForCausalLM, GPTQConfig
pretrained_model_dir = "meta-llama/Llama-2-7b"
gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
#pretrained_model_dir = "meta-llama/Llama-2-7b"
base_model_name = "NousResearch/Llama-2-7b-hf"
gptq_config = GPTQConfig(bits=4, dataset="c4", exllama_config={"version":2})
quantized_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
base_model_name,
device_map="auto",
quantization_config=gptq_config)
bitsandbytes
============

View File

@@ -31,7 +31,7 @@ Each accelerator or GPU has multiple Compute Units (CUs) and various CUs do comp
can a compute kernel can allocate its task to? For the :doc:`AMD MI300X accelerator <../../reference/gpu-arch-specs>`, the
grid should have at least 1024 thread blocks or workgroups.
.. figure:: ../../data/how-to/fine-tuning-llms/compute-unit.png
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/compute-unit.png
Schematic representation of a CU in the CDNA2 or CDNA3 architecture.
@@ -187,7 +187,7 @@ Kernel occupancy
.. _fine-tuning-llms-occupancy-vgpr-table:
.. figure:: ../../data/how-to/fine-tuning-llms/occupancy-vgpr.png
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/occupancy-vgpr.png
:alt: Occupancy related to VGPR usage in an Instinct MI300X accelerator.
:align: center
@@ -343,11 +343,6 @@ or :doc:`rocBLAS <rocblas:index>` is faster for a specific operation.
then required to strip out the kernel and create kernel
compilation and launch via Triton.
* For advanced ``matmul`` or ``conv`` configuration tuning, the ``inductor-gemm-tuner`` can
help. This implements the Triton ``conv``/``mm`` implementations used upstream
and allows specification of inputs and configuration tuning search space if new
tunings are found that can be added to the auto-tune list.
Other guidelines
================

View File

@@ -32,7 +32,7 @@ The template parameters of the instance are grouped into four parameter types:
================
### Figure 2
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-template_parameters.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-template_parameters.jpg
The template parameters of the selected GEMM kernel are classified into four groups. These template parameter groups should be defined properly before running the instance.
```
@@ -126,7 +126,7 @@ The row and column, and stride information of input matrices are also passed to
================
### Figure 3
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-kernel_launch.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-kernel_launch.jpg
Templated kernel launching consists of kernel instantiation, making arguments by passing in actual application parameters, creating an invoker, and running the instance through the invoker.
```
@@ -155,7 +155,7 @@ The first operation in the process is to perform the multiplication of input mat
================
### Figure 4
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-operation_flow.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-operation_flow.jpg
Operation flow.
```
@@ -171,7 +171,7 @@ Here, we use [DeviceBatchedGemmMultiD_Xdl](https://github.com/ROCm/composable_ke
================
### Figure 5
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-root_instance.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-root_instance.jpg
Use the DeviceBatchedGemmMultiD_Xdl instance as a root.
```
@@ -421,7 +421,7 @@ Run `python setup.py install` to build and install the extension. It should look
================
### Figure 6
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-compilation.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-compilation.jpg
Compilation and installation of the INT8 kernels.
```
@@ -433,7 +433,7 @@ The implementation architecture of running SmoothQuant models on MI300X GPUs is
================
### Figure 7
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-inference_flow.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-inference_flow.jpg
The implementation architecture of running SmoothQuant models on AMD MI300X accelerators.
```
@@ -459,7 +459,7 @@ Figure 8 shows the performance comparisons between the original FP16 and the Smo
================
### Figure 8
================ -->
```{figure} ../../data/how-to/fine-tuning-llms/ck-comparisons.jpg
```{figure} ../../data/how-to/llm-fine-tuning-optimization/ck-comparisons.jpg
Performance comparisons between the original FP16 and the SmoothQuant-quantized INT8 models on a single MI300X accelerator.
```

View File

@@ -41,7 +41,7 @@ The weight update is as follows: :math:`W_{updated} = W + ΔW`.
If the weight matrix :math:`W` contains 7B parameters, then the weight update matrix :math:`ΔW` should also
contain 7B parameters. Therefore, the :math:`ΔW` calculation is computationally and memory intensive.
.. figure:: ../../data/how-to/fine-tuning-llms/weight-update.png
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/weight-update.png
:alt: Weight update diagram
(a) Weight update in regular fine-tuning. (b) Weight update in LoRA where the product of matrix A (:math:`M\times K`)

View File

@@ -38,7 +38,7 @@ You can then visualize and view these metrics using an open-source profile visua
shows transactions denoting the CPU activities that launch GPU kernels while the lower section shows the actual GPU
activities where it processes the ``resnet18`` inferences layer by layer.
.. figure:: ../../data/how-to/fine-tuning-llms/perfetto-trace.svg
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/perfetto-trace.svg
Perfetto trace visualization example.
@@ -100,7 +100,7 @@ analyze bottlenecks and stressors for their computational workloads on AMD Insti
Omniperf collects hardware counters in multiple passes, and will therefore re-run the application during each pass
to collect different sets of metrics.
.. figure:: ../../data/how-to/fine-tuning-llms/omniperf-analysis.png
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omniperf-analysis.png
Omniperf memory chat analysis panel.
@@ -130,7 +130,7 @@ hardware counters are also included.
have the greatest impact on the end-to-end execution of the application and to discover what else is happening on the
system during a performance bottleneck.
.. figure:: ../../data/how-to/fine-tuning-llms/omnitrace-timeline.png
.. figure:: ../../data/how-to/llm-fine-tuning-optimization/omnitrace-timeline.png
Omnitrace timeline trace example.
@@ -138,10 +138,10 @@ For details usage and examples of using these tools, refer to the
`Introduction to profiling tools for AMD hardware <https://rocm.blogs.amd.com/software-tools-optimization/profilers/README.html>`_
developer blog.
Debugging with ROCm Debug Agent
Debugging with ROCr Debug Agent
===============================
ROCm Debug Agent (:doc:`ROCdebug-agent <rocr_debug_agent:index>`) is a library that can be loaded by the ROCm platform
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`) is a library that can be loaded by the ROCm platform
runtime (:doc:`ROCr <rocr-runtime:index>`) to provide the following functionalities for all AMD accelerators and GPUs
supported by the ROCm Debugger API (:doc:`ROCdbgapi <rocdbgapi:index>`).
@@ -155,9 +155,9 @@ Debugging memory access faults
------------------------------
Identifying a faulting kernel is often enough to triage a memory access fault. To that end, the
`ROCm Debug Agent <https://github.com/ROCm/rocr_debug_agent/>`_ can trap a memory access fault and provide a dump of all
`ROCr Debug Agent <https://github.com/ROCm/rocr_debug_agent/>`_ can trap a memory access fault and provide a dump of all
active wavefronts that caused the error as well as the name of the kernel. The
`AMD ROCm Debug Agent Library README <https://github.com/ROCm/rocr_debug_agent/blob/master/README.md>`_ provides full
`ROCr Debug Agent Library README <https://github.com/ROCm/rocr_debug_agent/blob/master/README.md>`_ provides full
instructions, but in brief:
* Compiling with ``-ggdb -O0`` is recommended but not required.

View File

@@ -21,3 +21,6 @@ In this guide, you'll learn about:
- :doc:`Running models from Hugging Face <hugging-face-models>`
- :doc:`Deploying your model <deploy-your-model>`
To learn about ROCm for HPC applications and scientific computing, see
:doc:`../rocm-for-hpc/index`.

View File

@@ -110,7 +110,7 @@ Fine-tuning your model
ROCm supports multiple techniques for :ref:`optimizing fine-tuning <fine-tuning-llms-concept-optimizations>`, for
example, LoRA, QLoRA, PEFT, and FSDP.
Learn more about challenges and solutions for model fine-tuning in :doc:`../fine-tuning-llms/index`.
Learn more about challenges and solutions for model fine-tuning in :doc:`../llm-fine-tuning-optimization/index`.
The following developer blogs showcase examples of how to fine-tune a model on an AMD accelerator or GPU.

View File

@@ -0,0 +1,231 @@
.. meta::
:description: How to use ROCm for HPC
:keywords: ROCm, AI, high performance computing, HPC
******************
Using ROCm for HPC
******************
The ROCm open-source software stack is optimized to extract high-performance
computing (HPC) workload performance from AMD Instinct™ accelerators
while maintaining compatibility with industry software frameworks.
ROCm enhances support and access for developers by providing streamlined and
improved tools that significantly increase productivity. Being open-source, ROCm
fosters innovation, differentiation, and collaboration within the developer
community, making it a powerful and accessible solution for leveraging the full
potential of AMD accelerators' capabilities in diverse computational
applications.
* For more information, see :doc:`What is ROCm? <../../what-is-rocm>`.
* For guidance on installing ROCm, see :doc:`rocm-install-on-linux:index`. See
the :doc:`../../compatibility/compatibility-matrix` for details on hardware
and operating system support.
Some of the most popular HPC frameworks are part of the ROCm platform, including
those to help parallelize operations across multiple accelerators and servers,
handle memory hierarchies, and solve linear systems.
.. image:: ../../data/how-to/rocm-for-hpc/hpc-stack-2024_6_20.png
:align: center
:alt: Software and hardware ecosystem surrounding ROCm and AMD Instinct for HPC
The following catalog of GPU-accelerated solutions includes a vast set of
platform-compatible HPC applications, including those for astrophysics, climate
and weather, computational chemistry, computational fluid dynamics, earth
science, genomics, geophysics, molecular dynamics, and physics computing.
Refer to the resources in the following table for instructions on building,
running, and deploying these applications on ROCm-capable systems with AMD
Instinct accelerators. Each build container provides parameters to specify
different source code branches, release versions of ROCm, OpenMPI, UCX, and
Ubuntu versions.
.. _hpc-apps:
..
Reduce font size of HPC app descriptions slightly.
.. raw:: html
<style>
#hpc-apps-table tr td:last-child {
font-size: 0.9rem;
}
</style>
.. container::
:name: hpc-apps-table
.. list-table::
:header-rows: 1
:stub-columns: 1
:widths: 2 2 5
* - Application domain
- HPC application
- Description
* - Physics
- `Chroma <https://github.com/amd/InfinityHub-CI/tree/main/chroma/>`_
- The Chroma package supports data-parallel programming constructs for lattice
field theory and in particular lattice QCD. It uses the SciDAC QDP++ data-parallel
programming (in C++) that presents a single high-level code image to the user,
but can generate highly optimized code for many architectural systems including
single node workstations, multi and many-core nodes, clusters of nodes via
QMP, and classic vector computers.
* -
- `Grid <https://github.com/amd/InfinityHub-CI/tree/main/grid/>`_
- Grid is a library for lattice QCD calculations that employs a high-level data parallel
approach while using a number of techniques to target multiple types of parallelism.
The library currently supports MPI, OpenMP and short vector parallelism. The SIMD
instructions sets covered include SSE, AVX, AVX2, FMA4, IMCI and AVX512. Recent
releases expanded this support to include GPU offloading.
* -
- `MILC <https://github.com/amd/InfinityHub-CI/tree/main/milc/>`_
- The MILC Code is a set of research codes developed by MIMD Lattice Computation
(MILC) collaboration for doing simulations of four dimensional SU(3) lattice gauge
theory on MIMD parallel machines scaling from single-processor workstations
to HPC systems. The MILC Code is publicly available for research purposes.
Publications of work done using this code or derivatives of this code should
acknowledge this use.
* -
- `PIConGPU <https://github.com/amd/InfinityHub-CI/tree/main/picongpu>`_
- PIConGPU (Particle-in-cell on Graphics Processing Units) is an Open Source
simulations framework for plasma and laser-plasma physics used to develop
advanced particle accelerators for radiation therapy of cancer, high energy
physics and photon science.
* - Astrophysics
- `Cholla <https://github.com/amd/InfinityHub-CI/tree/main/cholla/>`_
- An astrophysical simulation code developed for the extreme environments
encountered in astrophysical systems.
* - Geophysics
- `SPECFEM3D Cartesian <https://github.com/amd/InfinityHub-CI/tree/main/specfem3d>`_
- SPECFEM3D Cartesian simulates acoustic (fluid), elastic (solid), coupled
acoustic/elastic, poroelastic or seismic wave propagation in any type of
conforming mesh of hexahedra (structured or not.) It can, for instance,
model seismic waves propagating in sedimentary basins or any other
regional geological model following earthquakes. It can also be used
for non-destructive testing or for ocean acoustics.
* - Molecular dynamics
- `GROMACS with HIP (AMD implementation) <https://github.com/amd/InfinityHub-CI/tree/main/gromacs>`_
- GROMACS is a versatile package to perform molecular dynamics, i.e.
simulate the Newtonian equations of motion for systems with hundreds
to millions of particles. This AMD container is based on a released
version of GROMACS modified by AMD. This container only supports up
to a 8 GPU configuration
* -
- `LAMMPS <https://github.com/amd/InfinityHub-CI/tree/main/lammps>`_
- LAMMPS is a classical molecular dynamics code with a focus on materials
modeling. It's an acronym for Large-scale Atomic/Molecular Massively
Parallel Simulator.
* - Computational fluid dynamics
- `NEKO <https://github.com/amd/InfinityHub-CI/tree/main/neko>`_
- Neko is a portable framework for high-order spectral element flow simulations.
Written in modern Fortran, Neko adopts an object-oriented approach, allowing
multi-tier abstractions of the solver stack and facilitating various hardware
backends ranging from general-purpose processors, CUDA and HIP enabled
accelerators to SX-Aurora vector processors.
* -
- `nekRS <https://github.com/amd/InfinityHub-CI/tree/main/nekrs>`_
- nekRS is an open-source Navier Stokes solver based on the spectral element
method targeting classical processors and accelerators like GPUs.
* - Computational chemistry
- `QUDA <https://github.com/amd/InfinityHub-CI/tree/main/quda>`_
- Library designed for efficient lattice QCD computations on
accelerators. It includes optimized Dirac operators and a variety of
fermion solvers and conjugate gradient (CG) implementations, enhancing
performance and accuracy in lattice QCD simulations.
* - Electronic structure
- `CP2K <https://github.com/amd/InfinityHub-CI/tree/main/cp2k>`_
- CP2K is a quantum chemistry and solid state physics software package that can
perform atomistic simulations of solid state, liquid, molecular, periodic, material,
crystal, and biological systems. This AMD container, based on a released version
of CP2K, is an AMD beta version with ongoing optimizations.
* - Quantum Monte Carlo Simulation
- `QMCPACK <https://github.com/amd/InfinityHub-CI/tree/main/qmcpack>`_
- QMCPACK is an open-source production-level many-body ab initio Quantum
Monte Carlo code for computing the electronic structure of atoms, molecules, 2D
nanomaterials and solids. The solid-state capabilities include metallic systems
as well as insulators. QMCPACK is expected to run well on workstations through
to the latest generation supercomputers. Besides high performance, particular
emphasis is placed on code quality and reproducibility.
* - Climate and weather
- `MPAS <https://github.com/amd/InfinityHub-CI/tree/main/mpas>`_
- The Model for Prediction Across Scales (MPAS) is a collaborative project for
developing atmosphere, ocean, and other earth-system simulation components
for use in climate, regional climate, and weather studies.
* - Benchmark
- `rocHPL <https://github.com/amd/InfinityHub-CI/tree/main/rochpl>`_
- HPL, or High-Performance Linpack, is a benchmark which solves a uniformly
random system of linear equations and reports floating-point execution rate.
This documentation supports the implementation of the HPL benchmark on
top of AMD's ROCm platform.
* -
- `rocHPL-MxP <https://github.com/amd/InfinityHub-CI/tree/main/hpl-mxp>`_
- Benchmark that highlights the convergence of HPC and AI workloads by
solving a system of linear equations using novel, mixed-precision
algorithms.
* -
- `HPCG <https://github.com/amd/InfinityHub-CI/tree/main/hpcg>`_
- HPCG, or the High Performance Conjugate Gradient Benchmark complements
the High Performance LINPACK (HPL) benchmark. The computational and data
access patterns of HPCG are designed to closely match a broad set of important
applications not represented by HPL, and to incentivize computer system
designers to invest in capabilities that will benefit the collective performance
of these applications.
* - Tools and libraries
- `ROCm with GPU-aware MPI container <https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker>`_
- Base container for GPU-aware MPI with ROCm for HPC applications. This
project provides a boilerplate for building and running a Docker
container with ROCm supporting GPU-aware MPI implementations using
OpenMPI or UCX.
* -
- `Kokkos <https://github.com/amd/InfinityHub-CI/tree/main/kokkos>`_
- Kokkos is a programming model in C++ for writing performance portable
applications for use across HPC platforms. It provides abstractions for both
parallel execution of code and data management. Kokkos is designed to target
complex node architectures with N-level memory hierarchies and multiple types
of execution resources.
* -
- `PyFR <https://github.com/amd/InfinityHub-CI/tree/main/pyfr>`_
- PyFR is an open-source Python based framework for solving advection-diffusion
type problems on streaming architectures using the Flux Reconstruction approach of
Huynh. The framework is designed to solve a range of governing systems on mixed
unstructured grids containing various element types. It is also designed to target a
range of hardware platforms via use of an in-built domain specific language derived
from the Mako templating engine.
* -
- `RAJA <https://github.com/amd/InfinityHub-CI/tree/main/raja>`_
- RAJA is a library of C++ software abstractions, primarily developed at Lawrence
Livermore National Laboratory (LLNL), that enables architecture and programming
model portability for HPC applications.
* -
- `Trilinos <https://github.com/amd/InfinityHub-CI/tree/main/trilinos>`_
- The Trilinos Project is an effort to develop algorithms and enabling technologies
within an object-oriented software framework for the solution of large-scale,
complex multi-physics engineering and scientific problems.
To learn about ROCm for AI applications, see :doc:`../rocm-for-ai/index`.

View File

@@ -0,0 +1,42 @@
.. meta::
:description: Setting the number of CUs
:keywords: CU, CUs, number of CUs, compute units
.. _settings-cus-reference:
*************************************************************
Setting the number of compute units
*************************************************************
The GPU driver provides two environment variables to set the number of CUs used:
- ``HSA_CU_MASK``
- ``ROC_GLOBAL_CU_MASK``
The ``ROC_GLOBAL_CU_MASK`` variable sets the CU mask on queues created by HIP or OpenCL runtimes. The ``HSA_CU_MASK`` variable sets the mask on a lower level of queue creation in the driver. It also sets the mask on the queues being profiled.
.. tip::
When using GPUs to accelerate compute workloads, it sometimes becomes necessary to configure the hardware's usage of compute units (CU). This is a more advanced option, so please read this page before experimentation.
The environment variables have the following syntax:
::
ID = [0-9][0-9]* ex. base 10 numbers
ID_list = (ID | ID-ID)[, (ID | ID-ID)]* ex. 0,2-4,7
GPU_list = ID_list ex. 0,2-4,7
CU_list = 0x[0-F]* | ID_list ex. 0x337F OR 0,2-4,7
CU_Set = GPU_list : CU_list ex. 0,2-4,7:0-15,32-47 OR 0,2-4,7:0x337F
HSA_CU_MASK = CU_Set [; CU_Set]* ex. 0,2-4,7:0-15,32-47; 3-9:0x337F
The GPU indices are taken post ``ROCR_VISIBLE_DEVICES`` reordering. The listed or masked CUs are enabled for listed GPUs, and the others are disabled. Unlisted GPUs are not be affected, and their CUs are enabled.
The variable parsing stops when a syntax error occurs. The erroneous set and the following are ignored. Repeating GPU or CU IDs results in a syntax error. Specifying a mask with no usable CUs (CU_list is 0x0) results in a syntax error. To exclude GPU devices, use ``ROCR_VISIBLE_DEVICES``.
.. note::
These environment variables only affect ROCm software, not graphics applications.
Not all CU configurations are valid on all devices. For example, on devices where two CUs can be combined into a WGP (for kernels running in WGP mode), its not valid to disable only a single CU in a WGP. For more information about what to expect when disabling CUs, see the `Exploring AMD GPU Scheduling Details by Experimenting With “Worst Practices” <https://www.cs.unc.edu/~otternes/papers/rtsj2022.pdf>`_ paper.

View File

@@ -25,7 +25,6 @@ Our documentation is organized into the following categories:
:class-container: rocm-doc-grid
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-installation.jpg
:img-alt: Install documentation
:padding: 2
@@ -34,20 +33,18 @@ Our documentation is organized into the following categories:
* {doc}`Quick start guide<rocm-install-on-linux:tutorial/quick-start>`
* {doc}`Linux install guide<rocm-install-on-linux:how-to/native-install/index>`
* {doc}`Package manager integration<rocm-install-on-linux:how-to/native-install/package-manager-integration>`
* {doc}`Install Docker containers<rocm-install-on-linux:how-to/docker>`
* {doc}`ROCm & Spack<rocm-install-on-linux:how-to/spack>`
* Windows
* {doc}`Windows install guide<rocm-install-on-windows:how-to/install>`
* {doc}`Application deployment guidelines<rocm-install-on-windows:conceptual/deployment-guidelines>`
* [Deep learning frameworks](./how-to/deep-learning-rocm.rst)
* {doc}`Install Docker containers<rocm-install-on-linux:how-to/docker>`
* {doc}`PyTorch for ROCm<rocm-install-on-linux:how-to/3rd-party/pytorch-install>`
* {doc}`TensorFlow for ROCm<rocm-install-on-linux:how-to/3rd-party/tensorflow-install>`
* {doc}`JAX for ROCm<rocm-install-on-linux:how-to/3rd-party/jax-install>`
* {doc}`MAGMA for ROCm<rocm-install-on-linux:how-to/3rd-party/magma-install>`
* {doc}`ROCm & Spack<rocm-install-on-linux:how-to/spack>`
:::
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-compatibility.jpg
:img-alt: Compatibility information
:padding: 2
@@ -65,7 +62,6 @@ Our documentation is organized into the following categories:
<!-- markdownlint-disable MD051 -->
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-reference.jpg
:img-alt: Reference documentation
:padding: 2
@@ -81,18 +77,19 @@ Our documentation is organized into the following categories:
* [Development](#development-tools)
* [Performance analysis](#performance-analysis)
* [System](#system-tools)
* [Environment Variables](./reference/env-variables.rst)
* [Hardware specifications](./reference/gpu-arch-specs.rst)
:::
<!-- markdownlint-enable MD051 -->
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-howto.jpg
:img-alt: How-to documentation
:padding: 2
* [Using ROCm for AI](./how-to/rocm-for-ai/index.rst)
* [Fine-tuning LLMs and inference optimization](./how-to/fine-tuning-llms/index.rst)
* [Using ROCm for HPC](./how-to/rocm-for-hpc/index.rst)
* [Fine-tuning LLMs and inference optimization](./how-to/llm-fine-tuning-optimization/index.rst)
* [System tuning for various architectures](./how-to/tuning-guides.md)
* [MI100](./how-to/tuning-guides/mi100.md)
* [MI200](./how-to/tuning-guides/mi200.md)
@@ -102,12 +99,12 @@ Our documentation is organized into the following categories:
* [Using AddressSanitizer](./conceptual/using-gpu-sanitizer.md)
* [Compiler disambiguation](./conceptual/compiler-disambiguation.md)
* [OpenMP support in ROCm](./about/compatibility/openmp.md)
* [Setting the number of CUs](./how-to/setting-cus)
* [System level debugging](./how-to/system-debugging.md)
* [GitHub examples](https://github.com/amd/rocm-examples)
:::
:::{grid-item-card}
:class-card: sd-text-black
:img-top: ./data/banner-conceptual.jpg
:img-alt: Conceptual documentation
:padding: 2
@@ -117,7 +114,6 @@ Our documentation is organized into the following categories:
* [MI250](./conceptual/gpu-arch/mi250.md)
* [MI300](./conceptual/gpu-arch/mi300.md)
* [GPU memory](./conceptual/gpu-memory.md)
* [Setting the number of CUs](./conceptual/setting-cus)
* [File structure (Linux FHS)](./conceptual/file-reorg.md)
* [GPU isolation techniques](./conceptual/gpu-isolation.md)
* [Using CMake](./conceptual/cmake-packages.rst)

View File

@@ -22,7 +22,7 @@
* {doc}`Composable Kernel <composable_kernel:index>`
* {doc}`MIGraphX <amdmigraphx:index>`
* {doc}`MIOpen <miopen:index>`
* {doc}`MIVisionX <mivisionx:doxygen/html/index>`
* {doc}`MIVisionX <mivisionx:index>`
* {doc}`rocAL <rocal:index>`
* {doc}`rocDecode <rocdecode:index>`
* {doc}`ROCm Performance Primitives (RPP) <rpp:index>`

View File

@@ -0,0 +1,920 @@
.. meta::
:description: Environment variables reference
:keywords: AMD, ROCm, environment variables, environment, reference
.. role:: cpp(code)
:language: cpp
.. _env-variables-reference:
*************************************************************
ROCm environment variables
*************************************************************
The following table lists the most commonly used environment variables in the ROCm software stack. These variables help to perform simple tasks such as building a ROCm library or running applications on AMDGPUs.
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Value**
* - | ``HIP_PATH``
| The path of the HIP SDK on Microsoft Windows.
- Default: ``C:/hip``
* - | ``HIP_DIR``
| The path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
- Default: ``C:/hip``
* - | ``ROCM_PATH``
| The path of the installed ROCm software stack on Linux.
- Default: ``/opt/rocm``
* - | ``HIP_PLATFORM``
| The platform targeted by HIP. If ``HIP_PLATFORM`` is not set, then HIPCC attempts to auto-detect the platform, if it can find NVCC.
- ``amd``, ``nvidia``
CLR environment variables
=========================
AMD Common Language Runtime (:doc:`CLR <hip:understand/amd_clr>`) library contains source codes for AMD's compute languages runtimes:
* ``hipamd``: Contains implementation of HIP language on the AMD platform.
* ``opencl``: Contains implementation of `OpenCL™ <https://www.khronos.org/opencl/>`_ on AMD platform. It is hosted at `clr/opencl <https://github.com/ROCm/clr/tree/develop/opencl>`_.
* ``rocclr``: Contains common runtime used in HIP and OpenCL. This is hosted at `clr/rocclr <https://github.com/ROCm/clr/tree/develop/rocclr>`_.
The environment variables affecting the CLR library might affect HIP and OpenCL libraries or applications.
The following table lists the environment variables that affect ``opencl`` and ``hipamd``:
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Value**
* - | ``ROCM_LIBPATCH_VERSION``
| The ROCm version in the integer format. The format is
| :cpp:`MAJOR * 10000 + MINOR * 100 + PATCH`
- 50000, 60020...
* - | ``CPACK_DEBIAN_PACKAGE_RELEASE``
| This is the numbering of the Debian package itself, i.e., the version of the packaging and not the version of the content.
- 1, 2, 3...
* - | ``CPACK_RPM_PACKAGE_RELEASE``
| This is the numbering of the RPM package itself, i.e., the version of the packaging and not the version of the content.
- 1, 2, 3...
The following table lists the environment variables that affect ``hipamd``:
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Value**
* - | ``HIP_FORCE_QUEUE_PROFILING``
| Simulates the application to run in rocprof by forcing command queue profiling to ``on`` by default.
- | 0: Disable
| 1: Enable
* - | ``HSA_OVERRIDE_GFX_VERSION``
| Overrides the target version; used to enable HIP usage on unsupported hardware.
- 11.0.0, 10.3.0
* - | ``HSA_DISABLE_CACHE``
| Disables the L2 cache.
- | 0: Disable
| 1: Enable
* - | ``HSAKMT_DEBUG_LEVEL``
| When set to the highest level, the system prints memory allocation information.
- 1, 2, ... 7
The following table lists the environment variables that affect ``rocclr``:
.. https://github.com/ROCm/clr/blob/develop/rocclr/utils/flags.hpp
.. list-table::
:header-rows: 1
:widths: 35,14,51
* - **Environment variable**
- **Default value**
- **Value**
* - | ``AMD_CPU_AFFINITY``
| Resets CPU affinity of any runtime threads
- ``0``
- | 0: Disable
| 1: Enable
* - | ``AMD_DIRECT_DISPATCH``
| Enables direct kernel dispatch. Currently available on Linux; under development for Windows.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``AMD_GPU_FORCE_SINGLE_FP_DENORM``
| Forces denormalization for single precision.
- ``-1``
- | -1: Don't force
| 0: Disable
| 1: Enable
* - | ``AMD_LOG_LEVEL``
| Enables HIP log on various level.
- ``0``
- | 0: Disable log.
| 1: Enables log on error level.
| 2: Enables log on warning and lower levels.
| 3: Enables log on information and lower levels.
| 4: Enables log on debug and lower levels.
* - | ``AMD_LOG_LEVEL_FILE``
| Sets output file for ``AMD_LOG_LEVEL``.
- stderr output
-
* - | ``AMD_LOG_MASK``
| Specifies HIP log filters. Here is the ` complete list of log masks <https://github.com/ROCm/clr/blob/develop/rocclr/utils/debug.hpp#L40>`_.
- ``0x7FFFFFFF``
- | 0x1: Log API calls.
| 0x2: Kernel and copy commands and barriers.
| 0x4: Synchronization and waiting for commands to finish.
| 0x8: Decode and display AQL packets.
| 0x10: Queue commands and queue contents.
| 0x20: Signal creation, allocation, pool.
| 0x40: Locks and thread-safety code.
| 0x80: Kernel creations and arguments, etc.
| 0x100: Copy debug.
| 0x200: Detailed copy debug.
| 0x400: Resource allocation, performance-impacting events.
| 0x800: Initialization and shutdown.
| 0x1000: Misc debug, not yet classified.
| 0x2000: Show raw bytes of AQL packet.
| 0x4000: Show code creation debug.
| 0x8000: More detailed command info, including barrier commands.
| 0x10000: Log message location.
| 0x20000: Memory allocation.
| 0x40000: Memory pool allocation, including memory in graphs.
| 0x80000: Timestamp details.
| 0xFFFFFFFF: Log always even mask flag is zero.
* - | ``AMD_OCL_BUILD_OPTIONS``
| Sets the options for ``clBuildProgram`` and ``clCompileProgram``. This variable overrides the previously set options.
- None
-
* - | ``AMD_OCL_BUILD_OPTIONS_APPEND``
| Appends the options for ``clBuildProgram`` and ``clCompileProgram``.
- None
-
* - | ``AMD_OCL_LINK_OPTIONS``
| Sets the options for ``clLinkProgram``.
- None
-
* - | ``AMD_OCL_LINK_OPTIONS_APPEND``
| Appends the options for ``clLinkProgram``.
- None
-
* - | ``AMD_OCL_WAIT_COMMAND``
| Enforces a wait for every submitted command.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``OCL_SET_SVM_SIZE``
| Sets shared virtual memory (SVM) space size in bytes for discrete GPUs.
- ``65536``
-
* - | ``OCL_STUB_PROGRAMS``
| Enables OCL programs stubing.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``OPENCL_VERSION``
| Force GPU OpenCL version.
- ``200``
-
* - | ``AMD_OPT_FLUSH``
| Sets kernel flush option.
- ``0x1``
- | ``0x0`` = Uses system-scope fence operations.
| ``0x1`` = Uses device-scope fence operations when possible.
* - | ``AMD_SERIALIZE_COPY``
| Controls serialization of copies
- ``0``
- | 0: Disable
| 1: Waits for completion before enqueue.
| 2: Waits for completion after enqueue.
| 3: Both
* - | ``AMD_SERIALIZE_KERNEL``
| Serializes kernel enqueue.
- ``0``
- | 0: Disable
| 1: Waits for completion before enqueue.
| 2: Waits for completion after enqueue.
| 3: Both
* - | ``AMD_THREAD_TRACE_ENABLE``
| Enables thread trace extension.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``CL_KHR_FP64``
| Controls support for double precision.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``CQ_THREAD_STACK_SIZE``
| The default command queue thread stack size in Bytes.
- ``262144``: 256 KB
-
* - | ``CUDA_VISIBLE_DEVICES``
| The visible devices to HIP (whose indices are present in the sequence)
- None
- ``0,1,2``: List of the device indices. Depending on the number of devices in the system.
* - | ``DEBUG_CLR_GRAPH_PACKET_CAPTURE``
| Controls capturing of graph packets.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``DEBUG_CLR_LIMIT_BLIT_WG``
| Sets the limit for the number of workgroups in blit operations.
- ``16``
-
* - | ``DISABLE_DEFERRED_ALLOC``
| Controls deferred memory allocation on device.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_ADD_HBCC_SIZE``
| Adds HBCC size to the reported device memory.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_ANALYZE_HANG``
| Allows you to analyze GPU hang issue.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_BLIT_ENGINE_TYPE``
| Specifies blit engine type.
- ``0``
- | 0: Default
| 1: Host
| 2: CAL
| 3: Kernel
* - | ``GPU_CP_DMA_COPY_SIZE``
| Set maximum size of CP DMA copy in KB.
- ``1``
-
* - | ``GPU_DEBUG_ENABLE``
| Enables collection of extra information for debugger at the cost of performance.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_DEVICE_ORDINAL``
| Selects the device ordinal, which is a comma separated list of available devices.
- None
- A value of ``0,2`` exposes devices 1 and 3 in the system.
* - | ``GPU_DUMP_BLIT_KERNELS``
| Controls dumping of the kernels for blit manager.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_DUMP_CODE_OBJECT``
| Controls dumping of code object.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_ENABLE_COOP_GROUPS``
| Enables cooperative group launch.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_ENABLE_HW_P2P``
| Enables hardware peer to peer (P2P) path.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_ENABLE_LC``
| Enables LC path.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_ENABLE_PAL``
| Specifies platform abstraction library (PAL) backend.
- ``2``
- | 0: ROC
| 1: PAL
| 2: ROC or PAL
* - | ``GPU_ENABLE_WAVE32_MODE``
| Enables Wave32 compilation in hardware, if available.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_ENABLE_WGP_MODE``
| Enables WGP Mode in hardware, if available. Workgroups of waves are
| dispatched in one of the two modes: CU or WGP.
- ``1``
- | 0: CU mode. The waves of a workgroup are distributed across just two SIMD32s.
| 1: WGP mode. The waves of a workgroup are distributed across all 4 SIMD32s within a workgroup.
* - | ``GPU_FORCE_BLIT_COPY_SIZE``
| Specifies the threshold size in KB, under which blit is forced instead of system direct memory access (SDMA).
- ``0``
-
* - | ``GPU_FORCE_QUEUE_PROFILING``
| Forces command queue profiling.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_FLUSH_ON_EXECUTION``
| Submits commands to hardware on every operation.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_IMAGE_BUFFER_WAR``
| Enables image buffer workaround.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_IMAGE_DMA``
| Enables DRM DMA for image transfers.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_MAX_COMMAND_BUFFERS``
| Sets the maximum number of command buffers allocated per queue.
- ``8``
-
* - | ``GPU_MAX_HEAP_SIZE``
| Sets the maximum size of the GPU heap (in percentage) on the board memory.
- ``100``
-
* - | ``GPU_MAX_HW_QUEUES``
| Sets the maximum number of hardware queues to be allocated per device.
- ``4``
- This variable controls how many independent hardware queues HIP runtime can create per process, per device. If an application allocates more HIP streams than the specified value, then HIP runtime reuses the same hardware queues for the new streams in a round-robin manner. Note that this value doesn't apply to hardware queues that are created for CU-masked HIP streams or cooperative queues for HIP cooperative groups (single queue per device).
* - | ``GPU_MAX_REMOTE_MEM_SIZE``
| Sets the maximum size in KB for device memory substitution with the system.
- ``2``
-
* - | ``GPU_MAX_SUBALLOC_SIZE``
| Sets the maximum size for sub-allocations in KB.
- ``4096``
-
* - | ``GPU_MAX_USWC_ALLOC_SIZE``
| Sets the maximum uncacheable speculative write combining (USWC) allocation size in MB.
- ``2048``
- -1: No limit
* - | ``GPU_MAX_WORKGROUP_SIZE``
| Sets the maximum number of workitems in a workgroup for GPU.
- ``0``: Sets no limit on workitems.
-
* - | ``GPU_MIPMAP``
| Enables GPU mipmap extension.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``GPU_NUM_COMPUTE_RINGS``
| Sets the number of GPU compute rings.
- ``2``
- | 0: Disable
| Any other number corresponds to the number of compute rings.
* - | ``GPU_NUM_MEM_DEPENDENCY``
| Sets the number of memory objects for dependency tracking.
- ``256``
-
* - | ``GPU_PINNED_MIN_XFER_SIZE``
| Sets the minimum buffer size (in MB) for pinned read and write transfers.
- ``128``
-
* - | ``GPU_PINNED_XFER_SIZE``
| Sets the buffer size (in MB) for pinned read and write transfers.
- ``32``
-
* - | ``GPU_PRINT_CHILD_KERNEL``
| Specifies the number of child kernels to be printed.
- ``0``
-
* - | ``GPU_RESOURCE_CACHE_SIZE``
| Sets the resource cache size in MB.
- ``64``
-
* - | ``GPU_SINGLE_ALLOC_PERCENT``
| Sets the maximum size of a single allocation as a percentage of the total.
- ``85``
-
* - | ``GPU_STAGING_BUFFER_SIZE``
| Sets the GPU staging buffer size in MB.
- ``4``
-
* - | ``GPU_STREAMOPS_CP_WAIT``
| Forces the stream memory operation to wait on command processor (CP).
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_USE_DEVICE_QUEUE``
| Controls use of dedicated device queue for the actual submissions.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``GPU_WAVES_PER_SIMD``
| Forces the number of waves per SIMD.
- ``0``
- 1-10
* - | ``GPU_XFER_BUFFER_SIZE``
| Sets the transfer buffer size for image copy optimization in KB.
- ``0``
-
* - | ``HIP_FORCE_DEV_KERNARG``
| Forces device memory for kernel arguments.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``HIP_HIDDEN_FREE_MEM``
| Specifies the amount of memory to hide from the free memory reported by ``hipMemGetInfo``.
- ``0``: Disable
-
* - | ``HIP_HOST_COHERENT``
| Specifies if the memory is coherent between the host and GPU in ``hipHostMalloc``.
- ``0``
- | 0: Memory is not coherent.
| 1: Memory is coherent.
| Environment variable has effect, if the following conditions are statisfied:
| - One of the ``hipHostMallocDefault``, ``hipHostMallocPortable``, ``hipHostMallocWriteCombined`` or ``hipHostMallocNumaUser`` flag set to 1.
| - ``hipHostMallocCoherent``, ``hipHostMallocNonCoherent`` and ``hipHostMallocMapped`` flags set to 0.
* - | ``HIP_INITIAL_DM_SIZE``
| Sets the initial heap size for device malloc.
- ``8388608``: 8 MB
-
* - | ``HIP_LAUNCH_BLOCKING``
| Controls serialization of kernel execution.
- ``0``
- | 0: Disable. Kernel executes normally.
| 1: Enable. Serializes kernel execution; behaves similar to ``AMD_SERIALIZE_KERNEL``.
* - | ``HIP_MEM_POOL_SUPPORT``
| Enables memory pool support in HIP.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``HIP_MEM_POOL_USE_VM``
| Enables memory pool support in HIP.
- | ``0``: Default value on other OS.
| ``1``: Default value on Microsoft Windows.
- | 0: Disable
| 1: Enable
* - | ``HIP_USE_RUNTIME_UNBUNDLER``
| Controls use of runtime code object unbundler.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``HIP_VISIBLE_DEVICES``
| Specifies the indices of the devices allowed to be visible to HIP.
- None
- 0,1,2: Depending on the number of devices on the system.
* - | ``HIP_VMEM_MANAGE_SUPPORT``
| Enables virtual memory management support.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``HIPCC_VERBOSE``
| Controls the extra information to be displayed during the build such as compiler commands with flags, paths and arguments.
- ``0``
- | 0x1: Print detailed compiler commands.
| 0x2: Print HIP, ROCm and CUDA paths (``HIP_PATH``, ``ROCM_PATH``, ``HIP_CLANG_PATH``, ...).
| 0x4: Print HIPCC arguments.
* - | ``HIPRTC_COMPILE_OPTIONS_APPEND``
| Sets compile options needed for ``hiprtc`` compilation.
- None
- ``--gpu-architecture=gfx906:sramecc+:xnack``, ``-fgpu-rdc``
* - | ``HIPRTC_LINK_OPTIONS_APPEND``
| Sets link options needed for ``hiprtc`` compilation.
- None
-
* - | ``HIPRTC_USE_RUNTIME_UNBUNDLER``
| Forces runtime unbundler in hiprtc.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``HSA_KERNARG_POOL_SIZE``
| Sets the pool size for kernel arguments.
- ``1048576``: 1 MB
-
* - | ``HSA_LOCAL_MEMORY_ENABLE``
| Enables use of local memory on HSA device.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``PAL_DISABLE_SDMA``
| Disables SDMA for PAL.
- ``0``
- | 0: Enable SDMA for PAL.
| 1: Disable SDMA for PAL.
* - | ``PAL_MALL_POLICY``
| Controls the behaviour of allocations with respect to the MALL.
- ``0``
- | 0: MALL policy is decided by KMD.
| 1: Allocations are never put through the MALL.
| 2: Allocations will always be put through the MALL.
* - | ``PAL_ALWAYS_RESIDENT``
| Forces memory resources to become resident during allocation.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``PAL_EMBED_KERNEL_MD``
| Enables writing kernel metadata into command buffers.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``PAL_FORCE_ASIC_REVISION``
| Forces a specific ASIC revision on all devices.
- ``0``
-
* - | ``PAL_HIP_IPC_FLAG``
| Enables inter-process flag for device allocation in PAL HIP.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``PAL_PREPINNED_MEMORY_SIZE``
| Sets the size in KB of pre-pinned memory.
- ``64``
-
* - | ``PAL_RGP_DISP_COUNT``
| Sets the number of dispatches for RGP capture with SQTT.
- ``10000``
-
* - | ``REMOTE_ALLOC``
| Enables use of remote memory for the global heap allocation.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``ROC_ACTIVE_WAIT_TIMEOUT``
| Forces active wait of GPU interrupt for the timeout in us.
- ``0``
-
* - | ``ROC_AQL_QUEUE_SIZE``
| Sets the AQL queue size in bytes in the AQL packets.
- ``16384``: 16 KB
-
* - | ``ROC_CPU_WAIT_FOR_SIGNAL``
| Enable CPU wait for dependent HSA signals.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``ROC_ENABLE_LARGE_BAR``
| Enable large bar if supported by the device.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``ROC_GLOBAL_CU_MASK``
| Sets a global CU mask, entered as hex value for all queues. Each active bit represents one CU, e.g., ``0xf`` enables 4 CUs.
- None
-
* - | ``ROC_HMM_FLAGS``
| Sets ROCm HMM configuration flags.
- ``0``: Disabled
-
* - | ``ROC_P2P_SDMA_SIZE``
| Sets the minimum size in KB for peer to peer (P2P) transfer with SDMA.
- ``1024``: 1 MB
-
* - | ``ROC_SIGNAL_POOL_SIZE``
| Sets the initial size for HSA signal pool.
- ``32``
-
* - | ``ROC_SKIP_KERNEL_ARG_COPY``
| Allows the runtime to skip kernel argument copy.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``ROC_SYSTEM_SCOPE_SIGNAL``
| Enable system scope for signals, uses interrupts.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``ROC_USE_FGS_KERNARG``
| Enables use of fine grain kernel arguments segment for supported ASICs.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``ROCPROFILER_REGISTER_ROOT``
| Sets the path to ``rocProfiler``.
- None
-
The following table lists the debug environment variables that affect ``rocclr`` of the CLR project. These environment variables can only be set during DEBUG build.
.. list-table::
:header-rows: 1
:widths: 35,14,51
* - **Environment variable**
- **Default value**
- **Value**
* - | ``AMD_OCL_SUBST_OBJFILE``
| Specifies binary substitution config file for OpenCL.
- None
-
* - | ``CPU_MEMORY_ALIGNMENT_SIZE``
| Sets the size in bytes for the default alignment of guarded memory on CPU.
- ``256``
-
* - | ``CPU_MEMORY_GUARD_PAGE_SIZE``
| Size of the CPU memory guard page in KB.
- ``64``: 64 KB
-
* - | ``CPU_MEMORY_GUARD_PAGES``
| Enables using guard pages for CPU memory.
- ``0``
- | 0: Disable
| 1: Enable
* - | ``MEMOBJ_BASE_ADDR_ALIGN``
| Alignment of the base address of any allocate memory object.
- ``4096``: 4 KB
-
* - | ``PARAMETERS_MIN_ALIGNMENT``
| Specifies the minimum alignment required for the abstract parameters stack.
- 64 at ``__AVX512F__``, 32 at ``__AVX__`` and 16 in other cases
-
ROCR-Runtime environment variables
==================================
.. https://github.com/ROCm/ROCR-Runtime/blob/master/src/core/util/flag.h
.. We need to extend the following list.
The following table lists the ROCR-Runtime environment variables:
.. list-table::
:header-rows: 1
:widths: 35,14,51
* - **Environment variable**
- **Default value**
- **Value**
* - | ``ROCR_VISIBLE_DEVICES``
| Specifies a list of device indices or UUIDs to be exposed to the applications.
- None
- ``0,GPU-DEADBEEFDEADBEEF``
* - | ``HSA_SCRATCH_MEM``
| Specifies the maximum amount of scratch memory that can be used per process per GPU.
-
-
* - | ``HSA_XNACK``
| Enables XNACK.
- None
- 1: Enable
* - | ``HSA_CU_MASK``
| Sets the mask on a lower level of queue creation in the driver.
| This mask is also applied to the queues being profiled.
- None
- ``1:0-8``
* - | ``HSA_ENABLE_SDMA``
| Enables the use of direct memory access (DMA) engines in all copy directions (Host-to-Device, Device-to-Host, Device-to-Device), when using any of the following APIs:
| ``hsa_memory_copy``,
| ``hsa_amd_memory_fill``,
| ``hsa_amd_memory_async_copy``,
| ``hsa_amd_memory_async_copy_on_engine``.
- ``1``
- | 0: Disable
| 1: Enable
* - | ``HSA_ENABLE_PEER_SDMA``
| Enables the use of DMA engines for Device-to-Device copies, when using any of the following APIs:
| ``hsa_memory_copy``,
| ``hsa_amd_memory_async_copy``,
| ``hsa_amd_memory_async_copy_on_engine``.
- ``1``
- | 0: Disable
| 1: Enable
Note that this environment variable is ignored if ``HSA_ENABLE_SDMA`` is set to 0.
rocPRIM environment variables
=============================
The following table lists the environment variables used in the rocPRIM library.
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Default value**
* - | ``HIP_PATH``
| Specifies the path of the HIP SDK on Microsoft Windows.
- ``C:/hip``
* - | ``HIP_DIR``
| Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
- ``C:/hip``
* - | ``VCPKG_PATH``
| Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
- ``C:/github/vcpkg``
* - | ``ROCM_PATH``
| Specifies the path of the installed ROCm software stack on Linux.
- ``/opt/rocm``
* - | ``ROCM_CMAKE_PATH``
| Specifies the path of the installed ROCm ``cmake`` file on Microsoft Windows.
- ``C:/hipSDK``
* - | ``HIPCC_COMPILE_FLAGS_APPEND``
| Enables extra ``amdclang++`` compiler flags on Linux. This environment variable is ignored if ``CXX`` environment variable is set.
- None
* - | ``ROCPRIM_USE_HMM``
| Enables the test suite to use unified memory, when set to 1 during the tests.
- None
* - | ``CTEST_RESOURCE_GROUP_0``
| Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
- None
hipCUB environment variables
============================
The following table lists the environment variables used in the hipCUB library.
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Default value**
* - | ``HIP_PATH``
| Specifies the path of the HIP SDK on Microsoft Windows.
- ``C:/hip``
* - | ``HIP_DIR``
| Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
- ``C:/hip``
* - | ``VCPKG_PATH``
| Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
- ``C:/github/vcpkg``
* - | ``ROCM_PATH``
| Specifies the path of the installed ROCm software stack on Linux.
- ``/opt/rocm``
* - | ``HIPCC_COMPILE_FLAGS_APPEND``
| Enables extra ``amdclang`` or ``amdclang++`` compiler flags on Linux. This environment variable is ignored if ``CXX`` or ``CC`` environment variable is set.
- None
* - | ``HIPCUB_USE_HMM``
| Enables the test suite to use unified memory, when set to 1 during the tests.
- None
* - | ``CTEST_RESOURCE_GROUP_0``
| Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
- None
rocThrust environment variables
===============================
The following table lists the environment variables used in the rocThrust library.
.. list-table::
:header-rows: 1
:widths: 70,30
* - **Environment variable**
- **Default value**
* - | ``HIP_PATH``
| Specifies the path of the HIP SDK on Microsoft Windows.
- ``C:/hip``
* - | ``HIP_DIR``
| Specifies the path of the HIP SDK on Microsoft Windows. This variable is ignored, if ``HIP_PATH`` is set.
- ``C:/hip``
* - | ``VCPKG_PATH``
| Specifies the path of the ``vcpkg`` package manager on Microsoft Windows. This environment variable has no effect on Linux.
- ``C:/github/vcpkg``
* - | ``ROCM_PATH``
| Specifies the path of the installed ROCm software stack on Linux.
- ``/opt/rocm``
* - | ``ROCTHRUST_USE_HMM``
| Enables the test suite to use unified memory, when set to 1 during the tests.
- None
* - | ``CTEST_RESOURCE_GROUP_0``
| Enables grouping of the tests for different CI steps. This environment variable is used by CI and is of little use to most users.
- None

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,8 @@
* {doc}`HIPIFY <hipify:index>`
* {doc}`ROCdbgapi <rocdbgapi:index>`
* [ROCmCC](./rocmcc.md)
* [ROCm Debug Agent](https://github.com/ROCm/rocr_debug_agent)
* {doc}`ROCm debugger (ROCgdb) <rocgdb:index>`
* {doc}`ROCm Debugger (ROCgdb) <rocgdb:index>`
* {doc}`ROCr Debug Agent <rocr_debug_agent:index>`
:::
(performance-tools)=

View File

@@ -8,6 +8,7 @@
| Version | Release date |
| ------- | ------------ |
| [6.1.2](https://rocm.docs.amd.com/en/docs-6.1.2/) | June 4, 2024 |
| [6.1.1](https://rocm.docs.amd.com/en/docs-6.1.1/) | May 8, 2024 |
| [6.1.0](https://rocm.docs.amd.com/en/docs-6.1.0/) | Apr 16, 2024 |
| [6.0.2](https://rocm.docs.amd.com/en/docs-6.0.2/) | Jan 31, 2024 |

View File

@@ -44,6 +44,8 @@ subtrees:
title: API libraries
- file: reference/rocm-tools.md
title: Tools
- file: reference/env-variables
title: Environment variables
- file: reference/gpu-arch-specs.rst
title: Hardware specifications
@@ -58,27 +60,29 @@ subtrees:
- file: how-to/rocm-for-ai/train-a-model.rst
- file: how-to/rocm-for-ai/hugging-face-models.rst
- file: how-to/rocm-for-ai/deploy-your-model.rst
- file: how-to/fine-tuning-llms/index.rst
- file: how-to/rocm-for-hpc/index.rst
title: Using ROCm for HPC
- file: how-to/llm-fine-tuning-optimization/index.rst
title: Fine-tuning LLMs and inference optimization
subtrees:
- entries:
- file: how-to/fine-tuning-llms/overview.rst
- file: how-to/llm-fine-tuning-optimization/overview.rst
title: Conceptual overview
- file: how-to/fine-tuning-llms/fine-tuning-and-inference.rst
- file: how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.rst
subtrees:
- entries:
- file: how-to/fine-tuning-llms/single-gpu-fine-tuning-and-inference.rst
- file: how-to/llm-fine-tuning-optimization/single-gpu-fine-tuning-and-inference.rst
title: Using a single accelerator
- file: how-to/fine-tuning-llms/multi-gpu-fine-tuning-and-inference.rst
- file: how-to/llm-fine-tuning-optimization/multi-gpu-fine-tuning-and-inference.rst
title: Using multiple accelerators
- file: how-to/fine-tuning-llms/model-quantization.rst
- file: how-to/fine-tuning-llms/model-acceleration-libraries.rst
- file: how-to/fine-tuning-llms/llm-inference-frameworks.rst
- file: how-to/fine-tuning-llms/optimizing-with-composable-kernel.md
- file: how-to/llm-fine-tuning-optimization/model-quantization.rst
- file: how-to/llm-fine-tuning-optimization/model-acceleration-libraries.rst
- file: how-to/llm-fine-tuning-optimization/llm-inference-frameworks.rst
- file: how-to/llm-fine-tuning-optimization/optimizing-with-composable-kernel.md
title: Optimizing with Composable Kernel
- file: how-to/fine-tuning-llms/optimizing-triton-kernel.rst
- file: how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.rst
title: Optimizing Triton kernels
- file: how-to/fine-tuning-llms/profiling-and-debugging.rst
- file: how-to/llm-fine-tuning-optimization/profiling-and-debugging.rst
- file: how-to/tuning-guides.md
title: System optimization
subtrees:
@@ -101,6 +105,8 @@ subtrees:
title: Compiler disambiguation
- file: about/compatibility/openmp.md
title: OpenMP support
- file: how-to/setting-cus
title: Setting the number of CUs
- file: how-to/system-debugging.md
title: Debugging
- url: https://github.com/amd/rocm-examples
@@ -140,8 +146,6 @@ subtrees:
title: White paper
- file: conceptual/gpu-memory.md
title: GPU memory
- file: conceptual/setting-cus
title: Setting the number of CUs
- file: conceptual/file-reorg.md
title: File structure (Linux FHS)
- file: conceptual/gpu-isolation.md

View File

@@ -1,2 +1,2 @@
rocm-docs-core==1.2.0
sphinx-reredirects
rocm-docs-core==1.4.0
sphinx-reredirects

View File

@@ -92,7 +92,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.2.0
rocm-docs-core==1.4.0
# via -r requirements.in
smmap==5.0.1
# via gitdb
@@ -122,7 +122,7 @@ sphinx-external-toc==1.0.1
# via rocm-docs-core
sphinx-notfound-page==1.0.2
# via rocm-docs-core
sphinx-reredirects==0.1.3
sphinx-reredirects==0.1.4
# via -r requirements.in
sphinxcontrib-applehelp==1.0.8
# via sphinx
@@ -142,7 +142,7 @@ typing-extensions==4.12.0
# via
# pydata-sphinx-theme
# pygithub
urllib3==2.2.1
urllib3==2.2.2
# via
# pygithub
# requests

View File

@@ -1,53 +1,6 @@
/* Adds container for big tables, used for Compatibility Matrix */
/* Header row to have opaque background colour when sticky */
.format-big-table th {
background-color: var(--pst-color-background);
.format-big-table {
white-space: nowrap;
}
/* Turn on borders for whole table */
.format-big-table th,
.format-big-table td {
border-width: 1px;
}
/* .format-big-table th.head { */
/* background-color: var(--pst-color-on-surface); */
/* } */
/* Sticky header for table excluding the stub*/
.format-big-table th.head:not(.stub) {
position: sticky;
top: 3rem;
z-index: 1;
}
/* Sticky header for the head & stub: top left cell */
.format-big-table th.head.stub {
position: sticky;
top: 3rem;
z-index: 1;
background-color: var(--pst-color-background);
white-space: nowrap;
}
/* Sticky for the stub column */
/*.format-big-table tbody th:not(:empty) {
position: sticky;
top: 3rem;
z-index: 2;
}*/
/* Removes borders for stub column */
.format-big-table tbody th {
border-top: none;
border-bottom: none;
}
/* For horizontal scrolling only. Can't be combined with format-big-table container */
.horizontal-scrolling-container {
overflow-x: scroll;
}

View File

@@ -43,7 +43,7 @@ Machine Learning & Computer Vision
":doc:`Composable Kernel <composable_kernel:index>`", "Provides a programming model for writing performance critical kernels for machine learning workloads across multiple architectures"
":doc:`MIGraphX <amdmigraphx:index>`", "Graph inference engine that accelerates machine learning model inference"
":doc:`MIOpen <miopen:index>`", "An open source deep-learning library"
":doc:`MIVisionX <mivisionx:doxygen/html/index>`", "Set of comprehensive computer vision and machine learning libraries, utilities, and applications"
":doc:`MIVisionX <mivisionx:index>`", "Set of comprehensive computer vision and machine learning libraries, utilities, and applications"
":doc:`rocAL <rocal:index>`", "An augmentation library designed to decode and process images and videos"
":doc:`rocDecode <rocdecode:index>`", "High-performance SDK for access to video decoding features on AMD GPUs"
":doc:`ROCm Performance Primitives (RPP) <rpp:index>`", "Comprehensive high-performance computer vision library for AMD processors with HIP/OpenCL/CPU back-ends"
@@ -99,18 +99,18 @@ Tools
":doc:`AMD SMI <amdsmi:index>`", "C library for Linux that provides a user space interface for applications to monitor and control AMD devices"
":doc:`HIPIFY <hipify:index>`", "Translates CUDA source code into portable HIP C++"
":doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`", "Captures the performance characteristics of buffer copying and kernel read/write operations"
":doc:`ROCdbgapi <rocdbgapi:index>`", "ROCm debugger API library"
":doc:`ROCmCC <./reference/rocmcc>`", "Clang/LLVM-based compiler"
":doc:`rocminfo <rocminfo:index>`", "Reports system information"
":doc:`ROCProfiler <rocprofiler:index>`", "Profiling tool for HIP applications"
":doc:`ROCTracer <roctracer:index>`", "Intercepts runtime API calls and traces asynchronous activity"
":doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`", "Captures the performance characteristics of buffer copying and kernel read/write operations"
":doc:`ROCm CMake <rocmcmakebuildtools:index>`", "Collection of CMake modules for common build and development tasks"
":doc:`ROCm Data Center Tool <rdc:index>`", "Simplifies administration and addresses key infrastructure challenges in AMD GPUs in cluster and data-center environments"
"`ROCm Debug Agent (ROCdebug-agent) <https://github.com/ROCm/rocr_debug_agent/>`_ ", "Prints the state of all AMD GPU wavefronts that caused a queue error by sending a SIGQUIT signal to the process while the program is running"
":doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`", "Source-level debugger for Linux, based on the GNU Debugger (GDB)"
":doc:`ROCdbgapi <rocdbgapi:index>`", "ROCm debugger API library"
":doc:`rocminfo <rocminfo:index>`", "Reports system information"
":doc:`ROCm SMI <rocm_smi_lib:index>`", "C library for Linux that provides a user space interface for applications to monitor and control GPU applications"
":doc:`ROCm Validation Suite <rocmvalidationsuite:index>`", "Detects and troubleshoots common problems affecting AMD GPUs running in a high-performance computing environment"
":doc:`ROCProfiler <rocprofiler:profiler_home_page>`", "Profiling tool for HIP applications"
":doc:`ROCTracer <roctracer:index>`", "Intercepts runtime API calls and traces asynchronous activity"
":doc:`ROCr Debug Agent <rocr_debug_agent:index>`", "Prints the state of all AMD GPU wavefronts that caused a queue error by sending a SIGQUIT signal to the process while the program is running"
":doc:`TransferBench <transferbench:index>`", "Utility to benchmark simultaneous transfers between user-specified devices (CPUs/GPUs)"
Compilers
@@ -119,7 +119,6 @@ Compilers
.. csv-table::
:header: "Component", "Description"
"`AOMP <https://github.com/ROCm/aomp/>`_", "Scripted build of `LLVM <https://github.com/ROCm/llvm-project>`_ and supporting software"
"`FLANG <https://github.com/ROCm/flang/>`_", "An out-of-tree Fortran compiler targeting LLVM"
":doc:`hipCC <hipcc:index>`", "Compiler driver utility that calls Clang or NVCC and passes the appropriate include and library options for the target compiler and HIP infrastructure"
"`LLVM (amdclang) <https://github.com/ROCm/llvm-project>`_ ", "Toolkit for the construction of highly optimized compilers, optimizers, and runtime environments"

View File

@@ -281,7 +281,7 @@ Note: These complex operations are equivalent to corresponding types/functions o
* `HIP_ROCclr`
* NVIDIA platform
* `HIP_PLATFORM_NVCC`
* The [hcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hip/hcc_detail) and [nvcc_detail](https://github.com/ROCm/clr/tree/1949b1621a802ffb1492616adbae6154bfbe64ef/hipamd/include/hips/nvcc_detail) directories in the clr repository are removed.
* The `hcc_detail` and `nvcc_detail` directories in the clr repository are removed.
* Deprecated gcnArch is removed from hip device struct `hipDeviceProp_t`.
* Deprecated `enum hipMemoryType memoryType;` is removed from HIP struct `hipPointerAttribute_t` union.

View File

@@ -3,7 +3,9 @@ ROCm™ 6.1.1 introduces minor fixes and improvements to some tools and librarie
### OS support
ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
* ROCm 6.1.1 now supports Oracle Linux. It has been tested against version 8.9 (kernel 5.15.0-205) with AMD Instinct MI300X accelerators.
* ROCm 6.1.1 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
### AMD SMI

View File

@@ -0,0 +1,37 @@
# Overview for ROCm.mk
This Makefile builds the various projects that makes up ROCm in the correct order.
It is expected to be run in an environment with the tooling set up. An easy way
to do this is to use Docker.
## Targets
* all (default)
* rocm-dev (a subset of all)
* clean
* list_components
* help
* T_foo
* C_foo
## Makefile Variables
* PEVAL set to 1 to enable some Makefile debugging code.
* RELEASE\_FLAG set to "" to avoid passing "-r" to builds, effect is package defined.
* NOBUILD="foo bar" to avoid adding foo and bar into the dependencies of top level targets. They still may be
built if they are needed as dependencies of other top level targets.
* toplevel
## Makefile assumptions
### Requirements for package "foo"
#### program build\_foo.sh
* Should take option "-c" to clean
* Should take option "-r" to do a normal "RelWithDeb" build
### For package "foo" we define some targets
* T\_foo - The main build target, calls "build\_foo.sh -r"
* C\_foo - Clean target, calls "build_foo.sh -c"

248
tools/rocm-build/ROCm.mk Normal file
View File

@@ -0,0 +1,248 @@
# Traditional first make target
all:
# Use bash as a shell
# On Ubuntu sh is 'dash'
SHELL:=bash
# Allow RELEASE_FLAG to be overwritten
RELEASE_FLAG?=-r
# Set SANITIZER_FLAG for sanitizer
ASAN_DEP:=
ifeq (${ENABLE_ADDRESS_SANITIZER},true)
ASAN_DEP=lightning
SANITIZER_FLAG=-a
endif
export INFRA_REPO:=ROCm/tools/rocm-build
OUT_DIR:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${OUT_DIR})
ROCM_INSTALL_PATH:=$(shell . ${INFRA_REPO}/envsetup.sh >/dev/null 2>&1 ; echo $${ROCM_INSTALL_PATH})
$(info OUT_DIR=${OUT_DIR})
$(info ROCM_INSTALL_PATH=${ROCM_INSTALL_PATH})
# -------------------------------------------------------------------------
# Internal stuff. Could be put in a different file to hide it.
# Internal macros, they need to be defined before being used.
# The internal "eval" allows parts of the Makefile to be generated.
# Whilst it is possible to dump the effective Makefile, it can be
# hard to see where parts come from. Set up the "peval" macro which
# optionally prints out the generated makefile snippet and evaluate it.
# Use "make PEVAL=1 all" to see the things being evaluated.
ifeq (,${PEVAL})
define peval =
$(eval $1)
endef
else
define peval =
$(eval $(info $1)$1)
endef
endif
# macro to add dependencies. Saves having to put all the OUT_DIR/logs in
# The outer strip is to work around a gnu make 4.1 and earlier bug
# It should not be needed.
define adddep =
$(strip $(call peval,components+= $(1) $(2))
$(foreach comp,$(strip $2),$(call peval,${OUT_DIR}/logs/${1}: ${OUT_DIR}/logs/${comp}))
)
endef
# End of internal stuff that is needed at the start of the file
# -------------------------------------------------------------------------
# Dependencies. These can be updated. Anything that is mentioned in
# either the args to the adddep macro will be added to components. as
# an example there is no need for the adddep of lightning, as it
# depends on nothing and at least one other component includes it.
# Syntax. Up to the first comma everything is fixed. The "call" is a
# keyword to gnu make. The "adddep" is the name of the variable containing
# the macro.
# The second comma delimited argument is the target.
# The third comma delimited arg is the thing that the target depends on.
# It is a space seperated list with zero or more elements.
$(call adddep,amd_smi_lib,${ASAN_DEP})
$(call adddep,aqlprofile,${ASAN_DEP} hsa)
$(call adddep,clang-ocl,lightning rocm-cmake)
$(call adddep,comgr,lightning devicelibs)
$(call adddep,dbgapi,hsa comgr)
$(call adddep,devicelibs,lightning)
$(call adddep,hip_on_rocclr,${ASAN_DEP} rocclr rocprofiler-register)
$(call adddep,hipcc,)
$(call adddep,hipify_clang,hip_on_rocclr lightning)
$(call adddep,hsa,${ASAN_DEP} thunk lightning devicelibs rocprofiler-register)
$(call adddep,lightning,)
$(call adddep,opencl_on_rocclr,${ASAN_DEP} rocclr)
$(call adddep,openmp_extras,thunk lightning devicelibs hsa)
$(call adddep,rdc,${ASAN_DEP} rocm_smi_lib hsa rocprofiler)
$(call adddep,rocclr,${ASAN_DEP} hsa comgr hipcc rocprofiler-register)
$(call adddep,rocm_bandwidth_test,${ASAN_DEP} hsa)
$(call adddep,rocm_smi_lib,${ASAN_DEP})
$(call adddep,rocm-cmake,${ASAN_DEP})
$(call adddep,rocm-core,${ASAN_DEP})
$(call adddep,rocm-gdb,dbgapi)
$(call adddep,rocminfo,${ASAN_DEP} hsa)
$(call adddep,rocprofiler-register,${ASAN_DEP})
$(call adddep,rocprofiler,${ASAN_DEP} hsa roctracer aqlprofile opencl_on_rocclr hip_on_rocclr comgr dbgapi rocm_smi_lib)
$(call adddep,rocr_debug_agent,${ASAN_DEP} hip_on_rocclr hsa dbgapi)
$(call adddep,roctracer,${ASAN_DEP} hsa hip_on_rocclr)
$(call adddep,thunk,${ASAN_DEP})
# rocm-dev points to all possible last finish components of Stage1 build.
rocm-dev-components :=rdc hipify_clang openmp_extras \
rocm-core amd_smi_lib hipcc clang-ocl \
rocm_bandwidth_test rocr_debug_agent rocm-gdb
$(call adddep,rocm-dev,$(filter-out ${NOBUILD},${rocm-dev-components}))
$(call adddep,amdmigraphx,hip_on_rocclr half rocblas miopen-hip lightning hipcc)
$(call adddep,composable_kernel,lightning hipcc hip_on_rocclr rocm-cmake)
$(call adddep,half,rocm-cmake)
$(call adddep,hipblas,hip_on_rocclr rocblas rocsolver lightning hipcc)
$(call adddep,hipblaslt,hip_on_rocclr openmp_extras hipblas lightning hipcc)
$(call adddep,hipcub,hip_on_rocclr rocprim lightning hipcc)
$(call adddep,hipfft,hip_on_rocclr openmp_extras rocfft rocrand hiprand lightning hipcc)
$(call adddep,hipfort,rocblas hipblas rocsparse hipsparse rocfft hipfft rocrand hiprand rocsolver hipsolver lightning hipcc)
$(call adddep,hiprand,hip_on_rocclr rocrand lightning hipcc)
$(call adddep,hipsolver,hip_on_rocclr rocblas rocsolver rocsparse lightning hipcc hipsparse)
$(call adddep,hipsparse,hip_on_rocclr rocsparse lightning hipcc)
$(call adddep,hipsparselt,hip_on_rocclr hipsparse lightning hipcc openmp_extras)
$(call adddep,hiptensor,hip_on_rocclr composable_kernel lightning hipcc)
$(call adddep,miopen-deps,lightning hipcc)
$(call adddep,miopen-hip,composable_kernel half hip_on_rocclr miopen-deps rocblas roctracer lightning hipcc)
$(call adddep,mivisionx,amdmigraphx miopen-hip rpp lightning hipcc)
$(call adddep,rccl,hip_on_rocclr hsa lightning hipcc rocm_smi_lib hipify_clang)
$(call adddep,rocalution,rocblas rocsparse rocrand lightning hipcc)
$(call adddep,rocblas,hip_on_rocclr openmp_extras lightning hipcc)
$(call adddep,rocdecode,hip_on_rocclr lightning hipcc)
$(call adddep,rocfft,hip_on_rocclr rocrand hiprand lightning hipcc openmp_extras)
$(call adddep,rocmvalidationsuite,hip_on_rocclr hsa rocblas rocm-core lightning hipcc rocm_smi_lib)
$(call adddep,rocprim,hip_on_rocclr lightning hipcc)
$(call adddep,rocrand,hip_on_rocclr lightning hipcc)
$(call adddep,rocsolver,hip_on_rocclr rocblas rocsparse lightning hipcc)
$(call adddep,rocsparse,hip_on_rocclr rocprim lightning hipcc)
$(call adddep,rocthrust,hip_on_rocclr rocprim lightning hipcc)
$(call adddep,rocwmma,hip_on_rocclr rocblas lightning hipcc rocm-cmake rocm_smi_lib)
$(call adddep,rpp,half lightning hipcc openmp_extras)
# -------------------------------------------------------------------------
# The rest of the file is internal
# Do not pass jobserver params if -n build
ifneq (,$(findstring n,${MAKEFLAGS}))
RMAKE:=
else
RMAKE := +
endif
# disable the builtin rules
.SUFFIXES:
# Linear
# include moredeps
# A macro to define a toplevel target, add it to the 'all' target
# Make it depend on the generated log. Generate the log of the build.
# See if the macro is already defined, if so don't touch it.
# As GNU make allows more than one makefile to be specified with "-f"
# one could put an alternative definition of "toplevel" in a different
# file or even the environment, and use the data in this file for other
# purposes. Uses might include generating output in "dot" format for
# showing the dependency graph, or having a wrapper script to run programs
# to generate code quality tools.
ifeq (${toplevel},)
# { Start of test to see if toplevel is defined
define toplevel =
# The "target" make, this builds the package if it is out of date
T_$1: ${OUT_DIR}/logs/$1 FRC
: $1 built
# The "upload" for $1, it uploads the packages for $1 to the central storage
U_$1: T_$1 FRC
source $${INFRA_REPO}/envsetup.sh && $${INFRA_REPO}/upload_packages.sh "$1"
: $1 uploaded
# The "clean" for $1, it just marks the target as not existing so it will be built
# in the future.
C_$1: FRC
rm -f ${OUT_DIR}/logs/$1 ${OUT_DIR}/logs/$1.repackaged
# parallel build {
${OUT_DIR}/logs/$1: | ${OUT_DIR}/logs
ifneq ($(wildcard ${OUT_DIR}/logs/$1.repackaged),)
@echo Skipping build of $1 as it has already been repackaged
cat $$@.repackaged > $$@
rm -f $$@.repackaged
else # } {
@echo $1 started due to $$? | sed "s:${OUT_DIR}/logs/::g"
# Build in a subshell so we get the time output
# Pass in jobserver info using the RMAKE variable
${RMAKE}@( if set -x && source $${INFRA_REPO}/envsetup.sh && \
rm -f $$@.errors $$@ $$@.repackaged && \
$${INFRA_REPO}/build_$1.sh -c && source $${INFRA_REPO}/ccache-env-mathlib.sh && \
time bash -x $${INFRA_REPO}/build_$1.sh $${RELEASE_FLAG} $${SANITIZER_FLAG} && $${INFRA_REPO}/post_inst_pkg.sh "$1" ; \
then mv $$@.inprogress $$@ ; \
else mv $$@.inprogress $$@.errors ; echo Error in $1 >&2 ; exit 1 ;\
fi ) > $$@.inprogress 2>&1
endif # }
# end of toplevel macro
endef
# } End of test to see if toplevel is defined
endif
components:=$(sort $(components))
# Create all the T_xxxx and C_xxxx targets
$(call peval,$(foreach dep,$(strip ${components}),$(call toplevel,${dep})))
# Add all the T_xxxx targets to "all" except those listed in NOBUILD
# Note this does not prohibit them from being built, it just means that
# a build of "all" will not force them to be built directly
# example command
# make -f jenkins-utils/scripts/Stage1.mk -j60
##help all: Build everything
all: $(addprefix T_,$(filter-out ${NOBUILD},${components}))
@echo All ROCm components built
# Do not document this target
upload: $(addprefix U_,${components})
@echo All ROCm components built and uploaded
##help rocm-dev: Build a subset of ROCm
rocm-dev: T_rocm-dev
@echo rocm-dev built
${OUT_DIR}/logs:
sudo mkdir -p -m 775 "${ROCM_INSTALL_PATH}" && \
sudo chown -R "$(shell id -u):$(shell id -g)" "${ROCM_INSTALL_PATH}"
sudo chown -R "$(shell id -u):$(shell id -g)" "/home/$(shell id -un)"
mkdir -p "${@}"
mkdir -p ${HOME}/.ccache
##help clean: remove the output directory and recreate it
clean:
[ -n "${OUT_DIR}" ] && rm -rf "${OUT_DIR}"
mkdir -p ${OUT_DIR}/logs
.SECONDARY: ${components:%=${OUT_DIR}/logs/%}
.PHONY: all clean repack help list_components
##help list_components: output the list of components
##help : Hint make list_components | paste - - - | column -t
list_components:
@echo "${components}" | sed 'y/ /\n/'
##help help: show this text
help:
@sed -n 's/^##help //p' ${MAKEFILE_LIST} | \
if type -t column > /dev/null ; then column -s: -t ; else cat ; fi
FRC:

View File

@@ -0,0 +1,145 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [-c|-r|-h] [makeopts]"
echo
echo "Options:"
echo " -c, --clean Removes all amd_smi build artifacts"
echo " -r, --release Build non-debug version amd_smi (default is debug)"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -s, --static Build static lib (.a). build instead of dynamic/shared(.so) "
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of type referred to by pkg_type"
echo " -p, --package <type> Specify packaging format"
echo " -h, --help Prints this help"
echo "Possible values for <type>:"
echo " deb -> Debian format (default)"
echo " rpm -> RPM format"
echo
return 0
}
PACKAGE_ROOT="$(getPackageRoot)"
TARGET="build"
PACKAGE_LIB=$(getLibPath)
PACKAGE_INCLUDE="$(getIncludePath)"
AMDSMI_BUILD_DIR=$(getBuildPath amdsmi)
AMDSMI_PACKAGE_DEB_DIR="$(getPackageRoot)/deb/amdsmi"
AMDSMI_PACKAGE_RPM_DIR="$(getPackageRoot)/rpm/amdsmi"
AMDSMI_BUILD_TYPE="debug"
BUILD_TYPE="Debug"
MAKETARGET="deb"
MAKEARG="$DASH_JAY O=$AMDSMI_BUILD_DIR"
AMDSMI_MAKE_OPTS="$DASH_JAY O=$AMDSMI_BUILD_DIR -C $AMDSMI_BUILD_DIR"
AMDSMI_PKG_NAME="amd-smi-lib"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
PKGTYPE="deb"
VALID_STR=`getopt -o hcraso:p: --long help,clean,release,static,address_sanitizer,outdir:,package: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
BUILD_TYPE="RelWithDebInfo" ; shift ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on
# TODO - support standard option of passing cmake environment vars - CFLAGS,CXXFLAGS etc., to enable address sanitizer
ADDRESS_SANITIZER=true ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
(-p | --package)
MAKETARGET="$2" ; shift 2;;
--) shift; break;; # end delimiter
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean_amdsmi() {
rm -rf "$AMDSMI_BUILD_DIR"
rm -rf "$AMDSMI_PACKAGE_DEB_DIR"
rm -rf "$AMDSMI_PACKAGE_RPM_DIR"
rm -rf "$PACKAGE_ROOT/amd_smi"
rm -rf "$PACKAGE_INCLUDE/amd_smi"
rm -f $PACKAGE_LIB/libamd_smi.*
return 0
}
build_amdsmi() {
echo "Building AMDSMI"
echo "AMDSMI_BUILD_DIR: ${AMDSMI_BUILD_DIR}"
if [ ! -d "$AMDSMI_BUILD_DIR" ]; then
mkdir -p $AMDSMI_BUILD_DIR
pushd $AMDSMI_BUILD_DIR
print_lib_type $SHARED_LIBS
cmake \
-DBUILD_SHARED_LIBS=$SHARED_LIBS \
$(rocm_common_cmake_params) \
$(rocm_cmake_params) \
-DENABLE_LDCONFIG=OFF \
-DAMD_SMI_PACKAGE="${AMDSMI_PKG_NAME}" \
-DCPACK_PACKAGE_VERSION_MAJOR="1" \
-DCPACK_PACKAGE_VERSION_MINOR="$ROCM_LIBPATCH_VERSION" \
-DCPACK_PACKAGE_VERSION_PATCH="0" \
-DADDRESS_SANITIZER="$ADDRESS_SANITIZER" \
-DBUILD_TESTS=ON \
"$AMD_SMI_LIB_ROOT"
popd
fi
echo "Making amd_smi package:"
cmake --build "$AMDSMI_BUILD_DIR" -- $AMDSMI_MAKE_OPTS
cmake --build "$AMDSMI_BUILD_DIR" -- $AMDSMI_MAKE_OPTS install
cmake --build "$AMDSMI_BUILD_DIR" -- $AMDSMI_MAKE_OPTS package
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$AMDSMI_PACKAGE_DEB_DIR" $AMDSMI_BUILD_DIR/*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$AMDSMI_PACKAGE_RPM_DIR" $AMDSMI_BUILD_DIR/*.rpm
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${AMDSMI_PACKAGE_DEB_DIR};;
("rpm")
echo ${AMDSMI_PACKAGE_RPM_DIR};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
verifyEnvSetup
case $TARGET in
(clean) clean_amdsmi ;;
(build) build_amdsmi ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"
exit 0

View File

@@ -0,0 +1,54 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src AMDMIGraphX
build_amdmigraphx() {
echo "Start build"
cd $COMPONENT_SRC
pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
GPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101"
fi
mkdir -p ${BUILD_DIR} && rm -rf ${BUILD_DIR}/* && mkdir -p ${HOME}/amdmigraphx && rm -rf ${HOME}/amdmigraphx/*
rbuild package -d "${HOME}/amdmigraphx" -B "${BUILD_DIR}" \
--cxx="${ROCM_PATH}/llvm/bin/clang++" \
--cc="${ROCM_PATH}/llvm/bin/clang" \
$(rocm_common_cmake_params) \
-DCMAKE_MODULE_LINKER_FLAGS="-Wl,--enable-new-dtags -Wl,--rpath,$ROCM_LIB_RPATH" \
-DGPU_TARGETS="${GPU_TARGETS}" \
-DCMAKE_INSTALL_RPATH=""
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
cd $BUILD_DIR && cmake --build . -- install -j${PROC}
show_build_cache_stats
}
clean_amdmigraphx() {
echo "Cleaning AMDMIGraphX build directory: ${BUILD_DIR} ${DEPS_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$DEPS_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_amdmigraphx ;;
outdir) print_output_directory ;;
clean) clean_amdmigraphx ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,150 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: ${BASH_SOURCE##*/} [options ...]"
echo
echo "Options:"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -p, --package <type> Specify packaging format"
echo " -r, --release Make a release build instead of a debug build"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo
echo "Possible values for <type>:"
echo " deb -> Debian format (default)"
echo " rpm -> RPM format"
echo
return 0
}
API_NAME="hsa-amd-aqlprofile"
PACKAGE_DEB="$(getPackageRoot)/deb/$API_NAME"
PROJ_NAME="$API_NAME"
TARGET="build"
BUILD_TYPE="Debug"
MAKETARGET="deb"
MAKE_OPTS="$DASH_JAY -C $BUILD_DIR"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0
MAKETARGET="deb"
PKGTYPE="deb"
VALID_STR=$(getopt -o hcro:p: --long help,clean,release,clean,outdir:,package: -- "$@")
eval set -- "$VALID_STR"
while true; do
case "$1" in
-h | --help)
printUsage
exit 0
;;
-c | --clean)
TARGET="clean"
((CLEAN_OR_OUT |= 1))
shift
;;
-r | --release)
BUILD_TYPE="Release"
shift
;;
-o | --outdir)
TARGET="outdir"
PKGTYPE=$2
OUT_DIR_SPECIFIED=1
((CLEAN_OR_OUT |= 2))
shift 2
;;
--)
shift
break
;;
*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] " >&2
exit 20
;;
esac
done
copy_pkg_files_to_rocm() {
local comp_folder=$1
local comp_pkg_name=$2
cd "${OUT_DIR}/${PKGTYPE}/${comp_folder}"|| exit 2
if [ "${PKGTYPE}" = 'deb' ]; then
dpkg-deb -x ${comp_pkg_name}_*.deb pkg/
else
mkdir pkg && pushd pkg/ || exit 2
if [[ "${comp_pkg_name}" != *-dev* ]]; then
rpm2cpio ../${comp_pkg_name}-*.rpm | cpio -idmv
else
rpm2cpio ../${comp_pkg_name}el-*.rpm | cpio -idmv
fi
popd || exit 2
fi
ls ./pkg -alt
sudo cp -r ./pkg/*/rocm*/* "${ROCM_PATH}" || exit 2
rm -rf pkg/
}
clean() {
echo "Cleaning $PROJ_NAME package"
rm -rf "$PACKAGE_DEB"
}
build() {
echo "Downloading $PROJ_NAME" package
if [ "$DISTRO_NAME" = ubuntu ]; then
mkdir -p "$PACKAGE_DEB"
local rocm_ver=${ROCM_VERSION}
if [ ${ROCM_VERSION##*.} = 0 ]; then
rocm_ver=${ROCM_VERSION%.*}
fi
local url="https://repo.radeon.com/rocm/apt/${rocm_ver}/pool/main/h/${API_NAME}/"
local package
package=$(curl -s "$url" | grep -Po 'href="\K[^"]*' | grep "${DISTRO_RELEASE}" | head -n 1)
if [ -z "$package" ]; then
echo "No package found for Ubuntu version $DISTRO_RELEASE"
exit 1
fi
wget -t3 -P "$PACKAGE_DEB" "${url}${package}"
copy_pkg_files_to_rocm ${API_NAME} ${API_NAME}
else
echo "$DISTRO_ID is not supported..."
exit 2
fi
echo "Installing $PROJ_NAME" package
}
print_output_directory() {
case ${PKGTYPE} in
"deb")
echo ${PACKAGE_DEB}
;;
"rpm")
echo ${PACKAGE_RPM}
;;
*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2
exit 1
;;
esac
exit
}
case "$TARGET" in
clean) clean ;;
build) build ;;
outdir) print_output_directory ;;
*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,136 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [-c|-r|-h] [makeopts]"
echo
echo "Options:"
echo " -c, --clean Removes all clang-ocl build artifacts"
echo " -r, --release Build non-debug version clang-ocl (default is debug)"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo " -s, --static Supports static CI by accepting this param & not bailing out. No effect of the param though"
echo
return 0
}
TARGET="build"
CLANG_OCL_DEST="$(getBinPath)"
CLANG_OCL_SRC_ROOT="$CLANG_OCL_ROOT"
CLANG_OCL_BUILD_DIR="$(getBuildPath clang-ocl)"
MAKEARG="$DASH_JAY"
PACKAGE_ROOT="$(getPackageRoot)"
PACKAGE_UTILS="$(getUtilsPath)"
CLANG_OCL_PACKAGE_DEB="$PACKAGE_ROOT/deb/clang-ocl"
CLANG_OCL_PACKAGE_RPM="$PACKAGE_ROOT/rpm/clang-ocl"
BUILD_TYPE="Debug"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
MAKETARGET="deb"
PKGTYPE="deb"
VALID_STR=`getopt -o hcraso:g: --long help,clean,release,clean,static,address_sanitizer,outdir:,gpu_list: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
MAKEARG="$MAKEARG BUILD_TYPE=rel" ; BUILD_TYPE="Release" ; shift ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
(-g | --gpu_list )
GPU_LIST=$2; shift 2 ;;
--) shift; break;;
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean_clang-ocl() {
echo "Removing clang-ocl"
rm -rf $CLANG_OCL_DEST/clang-ocl
rm -rf $CLANG_OCL_BUILD_DIR
rm -rf $CLANG_OCL_PACKAGE_DEB
rm -rf $CLANG_OCL_PACKAGE_RPM
}
build_clang-ocl() {
if [ ! -d "$CLANG_OCL_BUILD_DIR" ]; then
mkdir -p $CLANG_OCL_BUILD_DIR
pushd $CLANG_OCL_BUILD_DIR
if [ -e $PACKAGE_ROOT/lib/bitcode/opencl.amdgcn.bc ]; then
BC_DIR="$ROCM_INSTALL_PATH/lib"
else
BC_DIR="$ROCM_INSTALL_PATH/amdgcn/bitcode"
fi
cmake \
$(rocm_cmake_params) \
-DDISABLE_CHECKS="ON" \
-DCLANG_BIN="$ROCM_INSTALL_PATH/llvm/bin" \
-DBITCODE_DIR="$BC_DIR" \
$(rocm_common_cmake_params) \
-DCPACK_SET_DESTDIR="OFF" \
$CLANG_OCL_SRC_ROOT
echo "Making clang-ocl:"
cmake --build . -- $MAKEARG
cmake --build . -- $MAKEARG install
cmake --build . -- $MAKEARG package
popd
fi
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_DEB" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$CLANG_OCL_PACKAGE_RPM" $CLANG_OCL_BUILD_DIR/rocm-clang-ocl*.rpm
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${CLANG_OCL_PACKAGE_DEB};;
("rpm")
echo ${CLANG_OCL_PACKAGE_RPM};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
case $TARGET in
(clean) clean_clang-ocl ;;
(build) build_clang-ocl ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"
exit 0

151
tools/rocm-build/build_comgr.sh Executable file
View File

@@ -0,0 +1,151 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [options ...]"
echo
echo "Options:"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -p, --package <type> Specify packaging format"
echo " -r, --release Make a release build instead of a debug build"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo " -s, --static Build static lib (.a). build instead of dynamic/shared(.so) "
echo " -l, --link_llvm_static Link to LLVM statically. Default is to dynamically link to LLVM; requires that LLVM dylibs are created."
echo
echo "Possible values for <type>:"
echo " deb -> Debian format (default)"
echo " rpm -> RPM format"
echo
return 0
}
API_NAME=amd_comgr
PROJ_NAME=$API_NAME
LIB_NAME=lib${API_NAME}
TARGET=build
MAKETARGET=deb
PACKAGE_ROOT=$(getPackageRoot)
PACKAGE_LIB=$(getLibPath)
PACKAGE_INCLUDE=$(getIncludePath)
BUILD_DIR=$(getBuildPath $API_NAME)
PACKAGE_DEB=$(getPackageRoot)/deb/$API_NAME
PACKAGE_RPM=$(getPackageRoot)/rpm/$API_NAME
PACKAGE_PREFIX=$ROCM_INSTALL_PATH
BUILD_TYPE=Debug
MAKE_OPTS="$DASH_JAY CTEST_OUTPUT_ON_FAILURE=1 -C $BUILD_DIR"
VERBOSE_OPTS="AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stdout"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
PKGTYPE="deb"
MAKETARGET="deb"
LINK_LLVM_DYLIB="OFF"
VALID_STR=`getopt -o hcraslo:p: --long help,clean,release,address_sanitizer,static,link_llvm_static,outdir:,package: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
BUILD_TYPE="RelWithDebInfo" ; shift ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-l | --link_llvm_static)
LINK_LLVM_DYLIB="OFF"; shift ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
(-p | --package)
MAKETARGET="$2" ; shift 2;;
--) shift; break;; # end delimiter
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean() {
echo "Cleaning $PROJ_NAME"
rm -rf $BUILD_DIR
rm -rf $PACKAGE_DEB
rm -rf $PACKAGE_RPM
rm -rf $PACKAGE_ROOT/${PROJ_NAME}
rm -rf $PACKAGE_LIB/${LIB_NAME}*
}
build() {
echo "Building $PROJ_NAME"
mkdir -p "$BUILD_DIR"
pushd "$BUILD_DIR"
if [ "$SHARED_LIBS" == "OFF" ]
then
echo " Building Archive "
else
echo " Building Shared Object "
fi
cmake \
$(rocm_cmake_params) \
-DBUILD_SHARED_LIBS=$SHARED_LIBS \
$(rocm_common_cmake_params) \
-DCMAKE_DISABLE_FIND_PACKAGE_hip=TRUE \
-DCMAKE_CTEST_ARGUMENTS="--exclude-regex;multithread_test" \
-DLLVM_LINK_LLVM_DYLIB=$LINK_LLVM_DYLIB \
-DLLVM_ENABLE_LIBCXX=$LINK_LLVM_DYLIB \
$COMGR_ROOT
popd
cmake --build "$BUILD_DIR" -- $MAKE_OPTS
cmake --build "$BUILD_DIR" -- $MAKE_OPTS $VERBOSE_OPTS test
cmake --build "$BUILD_DIR" -- $MAKE_OPTS install
cmake --build "$BUILD_DIR" -- $MAKE_OPTS package
mkdir -p $PACKAGE_LIB
cp -R $BUILD_DIR/${LIB_NAME}* $PACKAGE_LIB
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" $BUILD_DIR/comgr*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" $BUILD_DIR/comgr*.rpm
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${PACKAGE_DEB};;
("rpm")
echo ${PACKAGE_RPM};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
verifyEnvSetup
case $TARGET in
(clean) clean ;;
(build) build ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,185 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src composable_kernel
build_miopen_ck() {
echo "Start Building Composable Kernel"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
cd $COMPONENT_SRC
mkdir "$BUILD_DIR" && cd "$BUILD_DIR"
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
GPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101"
fi
if [ "${ASAN_CMAKE_PARAMS}" == "true" ] ; then
cmake -DBUILD_DEV=OFF \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-'RelWithDebInfo'} \
-DCMAKE_CXX_COMPILER=${ROCM_PATH}/llvm/bin/clang++ \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCMAKE_PREFIX_PATH="${ROCM_PATH%-*}/lib/cmake;${ROCM_PATH%-*}/$ASAN_LIBDIR;${ROCM_PATH%-*}/llvm;${ROCM_PATH%-*}" \
-DCMAKE_SHARED_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_LIB_RPATH" \
-DCMAKE_EXE_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_EXE_RPATH" \
-DCMAKE_VERBOSE_MAKEFILE=1 \
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
-DCMAKE_INSTALL_PREFIX=${ROCM_PATH} \
-DCMAKE_PACKAGING_INSTALL_PREFIX=${ROCM_PATH} \
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
-DROCM_SYMLINK_LIBS=OFF \
-DCPACK_PACKAGING_INSTALL_PREFIX=${ROCM_PATH} \
-DROCM_DISABLE_LDCONFIG=ON \
-DROCM_PATH=${ROCM_PATH} \
-DCPACK_GENERATOR="${PKGTYPE^^}" \
${LAUNCHER_FLAGS} \
-DINSTANCES_ONLY=ON \
-DENABLE_ASAN_PACKAGING=true \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
"$COMPONENT_SRC"
else
cmake -DBUILD_DEV=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=${ROCM_PATH}/llvm/bin/clang++ \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCMAKE_PREFIX_PATH=${ROCM_PATH%-*} \
-DCMAKE_SHARED_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN' \
-DCMAKE_EXE_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN/../lib' \
-DCMAKE_VERBOSE_MAKEFILE=1 \
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
-DCMAKE_INSTALL_PREFIX=${ROCM_PATH} \
-DCMAKE_PACKAGING_INSTALL_PREFIX=${ROCM_PATH} \
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
-DROCM_SYMLINK_LIBS=OFF \
-DCPACK_PACKAGING_INSTALL_PREFIX=${ROCM_PATH} \
-DROCM_DISABLE_LDCONFIG=ON \
-DROCM_PATH=${ROCM_PATH} \
-DCPACK_GENERATOR="${PKGTYPE^^}" \
${LAUNCHER_FLAGS} \
-DINSTANCES_ONLY=ON \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
"$COMPONENT_SRC"
fi
cmake --build . -- -j${PROC} package
cmake --build "$BUILD_DIR" -- install
mkdir -p $PACKAGE_DIR && cp ./*.${PKGTYPE} $PACKAGE_DIR
rm -rf *
}
unset_asan_env_vars() {
ASAN_CMAKE_PARAMS="false"
export ADDRESS_SANITIZER="OFF"
export LD_LIBRARY_PATH=""
export ASAN_OPTIONS=""
}
set_address_sanitizer_off() {
export CFLAGS=""
export CXXFLAGS=""
export LDFLAGS=""
}
build_miopen_ckProf() {
ENABLE_ADDRESS_SANITIZER=false
echo "Start Building Composable Kernel Profiler"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
else
unset_asan_env_vars
set_address_sanitizer_off
fi
cd $COMPONENT_SRC
cd "$BUILD_DIR"
rm -rf *
architectures='gfx10 gfx11 gfx90 gfx94'
if [ -n "$GPU_ARCHS" ]; then
architectures=$(echo ${GPU_ARCHS} | awk -F';' '{for(i=1;i<=NF;i++) a[substr($i,1,5)]} END{for(i in a) printf i" "}')
else
architectures='gfx10 gfx11 gfx90 gfx94'
fi
for arch in ${architectures}
do
if [ "${ASAN_CMAKE_PARAMS}" == "true" ] ; then
cmake -DBUILD_DEV=OFF \
-DCMAKE_PREFIX_PATH="${ROCM_PATH%-*}/lib/cmake;${ROCM_PATH%-*}/$ASAN_LIBDIR;${ROCM_PATH%-*}/llvm;${ROCM_PATH%-*}" \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE:-'RelWithDebInfo'} \
-DCMAKE_SHARED_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_LIB_RPATH" \
-DCMAKE_EXE_LINKER_FLAGS_INIT="-Wl,--enable-new-dtags,--rpath,$ROCM_ASAN_EXE_RPATH" \
-DCMAKE_VERBOSE_MAKEFILE=1 \
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
-DCMAKE_INSTALL_PREFIX="${ROCM_PATH}" \
-DCMAKE_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
-DROCM_SYMLINK_LIBS=OFF \
-DCPACK_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
-DROCM_DISABLE_LDCONFIG=ON \
-DROCM_PATH="${ROCM_PATH}" \
-DCPACK_GENERATOR="${PKGTYPE^^}" \
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
${LAUNCHER_FLAGS} \
-DPROFILER_ONLY=ON \
-DENABLE_ASAN_PACKAGING=true \
-DGPU_ARCH="${arch}" \
"$COMPONENT_SRC"
else
cmake -DBUILD_DEV=OFF \
-DCMAKE_PREFIX_PATH="${ROCM_PATH%-*}" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_SHARED_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN' \
-DCMAKE_EXE_LINKER_FLAGS_INIT='-Wl,--enable-new-dtags,--rpath,$ORIGIN/../lib' \
-DCMAKE_VERBOSE_MAKEFILE=1 \
-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE \
-DCMAKE_INSTALL_PREFIX="${ROCM_PATH}" \
-DCMAKE_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
-DROCM_SYMLINK_LIBS=OFF \
-DCPACK_PACKAGING_INSTALL_PREFIX="${ROCM_PATH}" \
-DROCM_DISABLE_LDCONFIG=ON \
-DROCM_PATH="${ROCM_PATH}" \
-DCPACK_GENERATOR="${PKGTYPE^^}" \
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
${LAUNCHER_FLAGS} \
-DPROFILER_ONLY=ON \
-DGPU_ARCH="${arch}" \
"$COMPONENT_SRC"
fi
cmake --build . -- -j${PROC} package
cp ./*ckprofiler*.${PKGTYPE} $PACKAGE_DIR
rm -rf *
done
rm -rf _CPack_Packages/ && find -name '*.o' -delete
echo "Finished building Composable Kernel"
show_build_cache_stats
}
clean_miopen_ck() {
echo "Cleaning MIOpen-CK build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_miopen_ck; build_miopen_ckProf;;
outdir) print_output_directory ;;
clean) clean_miopen_ck ;;
*) die "Invalid target $TARGET" ;;
esac

159
tools/rocm-build/build_dbgapi.sh Executable file
View File

@@ -0,0 +1,159 @@
#!/bin/bash
source "${BASH_SOURCE%/*}/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE[0]}") [options ...]"
echo
echo "Options:"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -p, --package <type> Specify packaging format"
echo " -r, --release Make a release build instead of a debug build"
echo " --enable-assertions Enable assertions"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo " -M, --skip_man_pages Do not build the 'docs' target"
echo " -s, --static Supports static CI by accepting this param & not bailing out. No effect of the param though"
echo
echo "Possible values for <type>:"
echo " deb -> Debian format (default)"
echo " rpm -> RPM format"
echo
return 0
}
API_NAME=rocm-dbgapi
AMD_DBGAPI_NAME=amd-dbgapi
MAKEINSTALL_MANIFEST=makeinstall_manifest.txt
PROJ_NAME=$API_NAME
LIB_NAME=lib${API_NAME}.so
TARGET=build
MAKETARGET=deb
PACKAGE_ROOT=$(getPackageRoot)
PACKAGE_LIB=$(getLibPath)
PACKAGE_INCLUDE=$(getIncludePath)
BUILD_DIR=$(getBuildPath $API_NAME)
PACKAGE_DEB=$(getPackageRoot)/deb/$API_NAME
PACKAGE_RPM=$(getPackageRoot)/rpm/$API_NAME
BUILD_TYPE=Debug
MAKE_OPTS=($DASH_JAY -C "$BUILD_DIR")
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
MAKETARGET="deb"
PKGTYPE="deb"
DODOCSBUILD=true
VALID_STR=$(getopt -o hcraso:p:M --long help,clean,release,enable-assertions,static,address_sanitizer,outdir:,package:skip_man_pages -- "$@")
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ;;
(-r | --release)
ENABLE_ASSERTIONS=${ENABLE_ASSERTIONS:-"Off"} ;
BUILD_TYPE="RelWithDebInfo" ;;
( --enable-assertions)
ENABLE_ASSERTIONS="On" ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on ;;
(-s | --static)
SHARED_LIBS="OFF" ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; ((CLEAN_OR_OUT|=2)) ; shift 1 ;;
(-M | --skip_man_pages) DODOCSBUILD=false;;
(-p | --package)
MAKETARGET="$2" ; shift 1;;
--) shift; break;;
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
shift
done
check_conflicting_options $CLEAN_OR_OUT "$PKGTYPE" "$MAKETARGET"
if [ "$RET_CONFLICT" -ge 30 ]; then
print_vars "$API_NAME" "$TARGET" "$BUILD_TYPE" "$SHARED_LIBS" "$CLEAN_OR_OUT" "$PKGTYPE" "$MAKETARGET"
exit "$RET_CONFLICT"
fi
clean() {
echo "Cleaning $PROJ_NAME"
if [ -e "$BUILD_DIR/$MAKEINSTALL_MANIFEST" ] ; then
xargs rm -f < "$BUILD_DIR/$MAKEINSTALL_MANIFEST"
fi
rm -rf "$BUILD_DIR"
rm -rf "$PACKAGE_DEB"
rm -rf "$PACKAGE_RPM"
rm -rf "${PACKAGE_ROOT:?}/${PROJ_NAME}"
rm -rf "${PACKAGE_LIB:?}/${LIB_NAME}"*
rm -rf "${PACKAGE_LIB:?}/cmake/${AMD_DBGAPI_NAME}"
rm -rf "${PACKAGE_INCLUDE:?}/${AMD_DBGAPI_NAME}"
}
build() {
if [ ! -e "$ROCM_DBGAPI_ROOT/CMakeLists.txt" ]
then
echo " No $ROCM_DBGAPI_ROOT/CMakeLists.txt file, skipping rocm-dbgapi" >&2
echo " No $ROCM_DBGAPI_ROOT/CMakeLists.txt file, skipping rocm-dbgapi"
exit 0
fi
echo "Building $PROJ_NAME"
mkdir -p "$BUILD_DIR"
pushd "$BUILD_DIR" || exit 99
cmake \
$(rocm_cmake_params) \
$(rocm_common_cmake_params) \
-DENABLE_ASSERTIONS=${ENABLE_ASSERTIONS:-"On"} \
"$ROCM_DBGAPI_ROOT"
popd || exit 99
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}"
"$DODOCSBUILD" && cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" doc
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" install
mv "$BUILD_DIR/install_manifest.txt" "$BUILD_DIR/$MAKEINSTALL_MANIFEST"
cmake --build "$BUILD_DIR" -- "${MAKE_OPTS[@]}" package
mkdir -p "$PACKAGE_LIB"
(
shopt -s nullglob
cp -R "$BUILD_DIR/lib/${LIB_NAME}"* "$BUILD_DIR/${LIB_NAME}"* "$PACKAGE_LIB"
)
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" "$BUILD_DIR/${API_NAME}"*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" "$BUILD_DIR/${API_NAME}"*.rpm
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${PACKAGE_DEB};;
("rpm")
echo ${PACKAGE_RPM};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
verifyEnvSetup
case $TARGET in
(clean) clean ;;
(build) build ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,140 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [options ...]"
echo
echo "Options:"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -r, --release Build a release version of the package"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -s, --static Supports static CI by accepting this param & not bailing out. No effect of the param though"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo
echo
return 0
}
PACKAGE_ROOT="$(getPackageRoot)"
PACKAGE_BIN="$(getBinPath)"
PACKAGE_LIB="$(getLibPath)"
BUILD_PATH="$(getBuildPath devicelibs)"
INSTALL_PATH="$(getPackageRoot)"
LIGHTNING_BUILD_PATH="$(getBuildPath lightning)"
DEB_PATH="$(getDebPath devicelibs)"
RPM_PATH="$(getRpmPath devicelibs)"
AMDGCN_LIB_PATH="$PACKAGE_ROOT/amdgcn/bitcode/"
TARGET="build"
MAKEOPTS="$DASH_JAY"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
PKGTYPE="deb"
MAKETARGET="deb"
VALID_STR=`getopt -o hcraso: --long help,clean,release,static,address_sanitizer,outdir: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
BUILD_TYPE="Release" ; shift ;;
(-a | --address_sanitizer)
ASAN_CMAKE_PARAMS="true"
ack_and_ignore_asan ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
--) shift; break;; # end delimiter
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean_devicelibs() {
rm -rf "$BUILD_PATH"
rm -f $PACKAGE_LIB/hc*.bc
rm -f $PACKAGE_LIB/irif*.bc
rm -f $PACKAGE_LIB/ockl*.bc
rm -f $PACKAGE_LIB/oclc*.bc
rm -f $PACKAGE_LIB/ocml*.bc
rm -f $PACKAGE_LIB/opencl*.bc
rm -f $PACKAGE_LIB/openmp*.bc
rm -rf $PACKAGE_ROOT/amdgcn
rm -rf "$DEB_PATH"
rm -rf "$RPM_PATH"
}
build_devicelibs() {
mkdir -p "$BUILD_PATH"
pushd "$BUILD_PATH"
local clangResourceDir="$($LIGHTNING_BUILD_PATH/bin/clang -print-resource-dir)"
local clangResourceVer=${clangResourceDir#*lib/clang/}
local bitcodeInstallLoc="lib/llvm/lib/clang/${clangResourceVer}/lib"
export LLVM_BUILD="$LIGHTNING_BUILD_PATH"
if [ ! -e Makefile ]; then
cmake $(rocm_cmake_params) \
$(rocm_common_cmake_params) \
-DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_NEW="$bitcodeInstallLoc/amdgcn" \
-DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_OLD="amdgcn" \
"$DEVICELIBS_ROOT"
echo "CMake complete"
fi
echo "Building device-libs"
cmake --build . -- $MAKEOPTS
cmake --build . -- $MAKEOPTS install
popd
}
package_devicelibs() {
mkdir -p "$DEB_PATH"
mkdir -p "$RPM_PATH"
pushd "$BUILD_PATH"
cpack
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$DEB_PATH" *.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$RPM_PATH" *.rpm
popd
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${DEB_PATH};;
("rpm")
echo ${RPM_PATH};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
case $TARGET in
(clean) clean_devicelibs ;;
(build) build_devicelibs; package_devicelibs ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

46
tools/rocm-build/build_half.sh Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src half
build_half() {
echo "Start build"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
ASAN_CMAKE_PARAMS="false"
fi
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
cmake \
-DCMAKE_INSTALL_PREFIX="$ROCM_PATH" \
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- package
cmake --build "$BUILD_DIR" -- install
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_half() {
echo "Cleaning half build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_half ;;
outdir) print_output_directory ;;
clean) clean_half ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,295 @@
#!/bin/bash
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [options ...]"
echo
echo "Options:"
echo " -h, --help Prints this help"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -r, --release Make a release build instead of a debug build"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -s, --static Build static lib (.a). build instead of dynamic/shared(.so) "
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of type referred to by pkg_type"
echo " -t, --offload-arch=<arch> Specify arch for catch tests ex: --offload-arch=gfx1030 --offload-arch=gfx1100"
echo " -p, --package <type> Specify packaging format"
echo
echo "Possible values for <type>:"
echo " deb -> Debian format (default)"
echo " rpm -> RPM format"
echo
return 0
}
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
MAKEOPTS="$DASH_JAY"
BUILD_PATH="$(getBuildPath hip-on-rocclr)"
TARGET="build"
PACKAGE_ROOT="$(getPackageRoot)"
PACKAGE_SRC="$(getSrcPath)"
PACKAGE_DEB="$PACKAGE_ROOT/deb/hip-on-rocclr"
PACKAGE_RPM="$PACKAGE_ROOT/rpm/hip-on-rocclr"
PREFIX_PATH="$PACKAGE_ROOT"
CORE_BUILD_DIR="$(getBuildPath hsa-core)"
ROCclr_BUILD_DIR="$(getBuildPath rocclr)"
HIPCC_BUILD_DIR="$(getBuildPath hipcc)"
CATCH_BUILD_DIR="$(getBuildPath catch)"
CATCH_SRC="$HIP_CATCH_TESTS_ROOT/catch"
SAMPLES_SRC="$HIP_CATCH_TESTS_ROOT/samples"
SAMPLES_BUILD_DIR="$(getBuildPath samples)"
if [ ! -e "$CATCH_SRC/CMakeLists.txt" ]; then
echo "Using catch source from hip project" >&2
CATCH_SRC="$HIP_ON_ROCclr_ROOT/tests/catch"
fi
BUILD_TYPE="Debug"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
MAKETARGET="deb"
PKGTYPE="deb"
OFFLOAD_ARCH=()
DEFAULT_OFFLOAD_ARCH=(gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1031 gfx1033 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1103)
VALID_STR=`getopt -o hcrast:o: --long help,clean,release,address_sanitizer,static,offload-arch=:,outdir: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
BUILD_TYPE="RelWithDebInfo" ; shift ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-t | --offload-arch=)
OFFLOAD_ARCH+=( "$2" ); ((CLEAN_OR_OUT|=2)); shift 2 ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
--) shift; break;;
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
if [ ${#OFFLOAD_ARCH[@]} = 0 ] ; then
OFFLOAD_ARCH=( "${DEFAULT_OFFLOAD_ARCH[@]}" )
else
echo "Using user defined offload archs ${OFFLOAD_ARCH[@]} for catch tests";
fi
printf -v OFFLOAD_ARCH_STR -- '--offload-arch=%q ' "${OFFLOAD_ARCH[@]}"
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean_hip_on_rocclr() {
rm -rf "$BUILD_PATH"
rm -rf "$PACKAGE_DEB"
rm -rf "$PACKAGE_RPM"
rm -rf "$OUT_DIR/hip"
}
build_hip_on_rocclr() {
if [ -e "$CLR_ROOT/CMakeLists.txt" ]; then
_HIP_CMAKELIST_DIR="$CLR_ROOT"
_HIP_CMAKELIST_OPT="-DCLR_BUILD_HIP=ON -DCLR_BUILD_OCL=OFF"
if [ -e "$HIPOTHER_ROOT/hipnv" ]; then
_HIP_CMAKELIST_OPT="$_HIP_CMAKELIST_OPT -DHIPNV_DIR=$HIPOTHER_ROOT/hipnv"
fi
elif [ ! -e "$HIPAMD_ROOT/CMakeLists.txt" ]; then
echo "No $HIPAMD_ROOT/CMakeLists.txt file, skipping hip on rocclr" >&2
echo "No $HIPAMD_ROOT/CMakeLists.txt file, skipping hip on rocclr"
exit 0
else
_HIP_CMAKELIST_DIR="$HIPAMD_ROOT"
_HIP_CMAKELIST_OPT=""
fi
echo "$_HIP_CMAKELIST_DIR"
mkdir -p "$BUILD_PATH"
pushd "$BUILD_PATH"
if [ ! -e Makefile ]; then
echo "Building HIP-On-ROCclr CMake environment"
print_lib_type $SHARED_LIBS
cmake $(rocm_cmake_params) \
-DBUILD_SHARED_LIBS=$SHARED_LIBS \
-DHIP_COMPILER=clang \
-DHIP_PLATFORM=amd \
-DHIP_COMMON_DIR="$HIP_ON_ROCclr_ROOT" \
$(rocm_common_cmake_params) \
-DCMAKE_HIP_ARCHITECTURES=OFF \
-DHSA_PATH="$ROCM_INSTALL_PATH" \
-DCMAKE_SKIP_BUILD_RPATH=TRUE \
-DCPACK_INSTALL_PREFIX="$ROCM_INSTALL_PATH" \
-DROCM_PATH="$ROCM_INSTALL_PATH" \
-DHIPCC_BIN_DIR="$HIPCC_BUILD_DIR" \
-DHIP_CATCH_TEST=1 \
$_HIP_CMAKELIST_OPT \
"$_HIP_CMAKELIST_DIR"
echo "CMake complete"
fi
echo "Build and Install HIP"
cmake --build . -- $MAKEOPTS install "VERBOSE=1"
popd
}
build_catch_tests() {
WORKSPACE=`pwd`
echo "Build catch2 tests independently"
if [ ! -e "$CATCH_SRC/CMakeLists.txt" ]; then
echo "catch source not found: $CATCH_SRC" >&2
exit
fi
# build catch
rm -rf "$CATCH_BUILD_DIR"
mkdir -p "$CATCH_BUILD_DIR"
pushd "$CATCH_BUILD_DIR"
export HIP_PATH="$ROCM_INSTALL_PATH"
export ROCM_PATH="$ROCM_INSTALL_PATH"
cmake \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-DHIP_PLATFORM=amd \
-DROCM_PATH="$ROCM_INSTALL_PATH" \
-DOFFLOAD_ARCH_STR="$OFFLOAD_ARCH_STR" \
$(rocm_common_cmake_params) \
-DCPACK_RPM_DEBUGINFO_PACKAGE=FALSE \
-DCPACK_DEBIAN_DEBUGINFO_PACKAGE=FALSE \
-DCPACK_INSTALL_PREFIX="$ROCM_INSTALL_PATH" \
"$CATCH_SRC"
make $MAKEOPTS build_tests
echo "Packaging catch tests"
make $MAKEOPTS package_test
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" *.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" *.rpm
popd
}
package_samples() {
if [ "$ASAN_CMAKE_PARAMS" == "true" ] ; then
echo "Disable the packaging of HIP samples" >&2
return
fi
WORKSPACE=`pwd`
if [ ! -e "$SAMPLES_SRC/CMakeLists.txt" ]; then
echo "HIP samples source not found at: $SAMPLES_SRC" >&2
echo "Using samples package from hip project: $BUILD_PATH" >&2
return
fi
rm -rf "$SAMPLES_BUILD_DIR"
mkdir -p "$SAMPLES_BUILD_DIR"
pushd "$SAMPLES_BUILD_DIR"
local CMAKE_PATH="$(getCmakePath)"
export HIP_PATH="$ROCM_INSTALL_PATH"
export ROCM_PATH="$ROCM_INSTALL_PATH"
cmake \
-DROCM_PATH="$ROCM_INSTALL_PATH" \
$(rocm_common_cmake_params) \
-DCMAKE_MODULE_PATH="$CMAKE_PATH/hip" \
-DCPACK_INSTALL_PREFIX="$ROCM_INSTALL_PATH" \
"$SAMPLES_SRC"
echo "Packaging hip samples from hip-tests project"
make $MAKEOPTS package_samples
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" *.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" *.rpm
popd
}
clean_hip_tests(){
rm -rf "$CATCH_BUILD_DIR"
rm -rf "$PACKAGE_SRC/hip-on-rocclr"
rm -rf "$PACKAGE_SRC/hipamd"
rm -rf "$PACKAGE_SRC/rocclr"
rm -rf "$PACKAGE_SRC/opencl-on-rocclr"
rm -rf "$PACKAGE_SRC/clr"
rm -rf "$PACKAGE_SRC/hip-tests"
rm -rf "$PACKAGE_SRC/hipother"
}
copy_hip_tests() {
clean_hip_tests
echo "Copy HIP & ROCclr Source and tests"
mkdir -p "$PACKAGE_SRC/hip-on-rocclr"
echo "Copying hip-on-rocclr"
progressCopy "$HIP_ON_ROCclr_ROOT" "$PACKAGE_SRC/hip-on-rocclr"
if [ -e "$CLR_ROOT/CMakeLists.txt" ]; then
mkdir -p "$PACKAGE_SRC/clr"
echo "Copying clr"
progressCopy "$CLR_ROOT" "$PACKAGE_SRC/clr"
else
mkdir -p "$PACKAGE_SRC/hipamd"
mkdir -p "$PACKAGE_SRC/rocclr"
mkdir -p "$PACKAGE_SRC/opencl-on-rocclr"
echo "Copying hipamd"
progressCopy "$HIPAMD_ROOT" "$PACKAGE_SRC/hipamd"
echo "Copying rocclr"
progressCopy "$ROCclr_ROOT" "$PACKAGE_SRC/rocclr"
echo "Copying opencl-on-rocclr"
progressCopy "$OPENCL_ON_ROCclr_ROOT" "$PACKAGE_SRC/opencl-on-rocclr"
fi
if [ -e "$HIPOTHER_ROOT/hipnv" ]; then
mkdir -p "$PACKAGE_SRC/hipother"
echo "Copying hipother"
progressCopy "$HIPOTHER_ROOT" "$PACKAGE_SRC/hipother"
fi
mkdir -p "$PACKAGE_SRC/hip-tests"
echo "Copying hip-tests"
progressCopy "$HIP_CATCH_TESTS_ROOT" "$PACKAGE_SRC/hip-tests"
}
package_hip_on_rocclr()
{
echo "Packagin HIP-on-ROCclr"
pushd "$BUILD_PATH"
cmake --build . -- $MAKEOPTS package
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" *.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" *.rpm
popd
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${PACKAGE_DEB};;
("rpm")
echo ${PACKAGE_RPM};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
case $TARGET in
(clean) clean_hip_on_rocclr; clean_hip_tests ;;
(build) build_hip_on_rocclr; build_catch_tests; package_hip_on_rocclr; package_samples; copy_hip_tests;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,65 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipBLAS
build_hipblas() {
echo "Start build"
CXX="g++"
CLIENTS_SAMPLES="ON"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
CLIENTS_SAMPLES="OFF"
fi
echo "C compiler: $CC"
echo "CXX compiler: $CXX"
echo "FC compiler: $FC"
cd $COMPONENT_SRC
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
rebuild_lapack
fi
cmake \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DUSE_CUDA=OFF \
-DBUILD_CLIENTS_TESTS=ON \
-DBUILD_CLIENTS_BENCHMARKS=ON \
-DBUILD_CLIENTS_SAMPLES="${CLIENTS_SAMPLES}" \
-DCPACK_SET_DESTDIR=OFF \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipblas() {
echo "Cleaning hipBLAS build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipblas ;;
outdir) print_output_directory ;;
clean) clean_hipblas ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,69 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipBLASLt
build_hipblaslt() {
echo "Start build"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
cd $COMPONENT_SRC
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
rebuild_lapack
fi
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
# gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942
GPU_TARGETS=all
fi
CXX=$(set_build_variables CXX)\
cmake \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DTensile_LOGIC= \
-DTensile_CODE_OBJECT_VERSION=default \
-DTensile_CPU_THREADS= \
-DTensile_LIBRARY_FORMAT=msgpack \
-DBUILD_CLIENTS_SAMPLES=ON \
-DBUILD_CLIENTS_TESTS=ON \
-DBUILD_CLIENTS_BENCHMARKS=ON \
-DCPACK_SET_DESTDIR=OFF \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipblaslt() {
echo "Cleaning hipBLASLt build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipblaslt ;;
outdir) print_output_directory ;;
clean) clean_hipblaslt ;;
*) die "Invalid target $TARGET" ;;
esac

118
tools/rocm-build/build_hipcc.sh Executable file
View File

@@ -0,0 +1,118 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [options ...]"
echo
echo "Options:"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -h, --help Prints this help"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -r, --release Makes a release build"
echo
return 0
}
API_NAME=hipcc
PROJ_NAME=$API_NAME
TARGET="build"
MAKEOPTS="$DASH_JAY"
BUILD_TYPE="Debug"
BUILD_DIR=$(getBuildPath $API_NAME)
PACKAGE_DEB=$(getPackageRoot)/deb/$API_NAME
PACKAGE_RPM=$(getPackageRoot)/rpm/$API_NAME
PACKAGE_SRC="$(getSrcPath)"
while [ "$1" != "" ];
do
case $1 in
(-a | --address_sanitizer)
ack_and_ignore_asan ;;
(-c | --clean)
TARGET="clean" ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 1 ;;
(-r | --release)
BUILD_TYPE="RelWithDebInfo" ;;
(-h | --help)
printUsage ; exit 0 ;;
(*)
echo "Invalid option [$1]" >&2; printUsage; exit 1 ;;
esac
shift 1
done
clean() {
echo "Cleaning hipcc"
rm -rf $BUILD_DIR
echo "Cleaning up hipcc DEB and RPM packages"
rm -rf $PACKAGE_DEB
rm -rf $PACKAGE_RPM
}
copy_hipcc_sources() {
echo "Clean up hipcc build folder"
rm -rf "$PACKAGE_SRC/hipcc"
echo "Copy hipcc sources"
mkdir -p "$PACKAGE_SRC/hipcc"
progressCopy "$HIPCC_ROOT" "$PACKAGE_SRC/hipcc"
}
build() {
echo "Build hipcc binary"
mkdir -p "$BUILD_DIR"
pushd "$BUILD_DIR"
if ! [ -e "$HIPCC_ROOT/CMakeLists.txt" ] ; then
echo "No source for hipcc, exiting. this is not an error" >&2
exit 0
fi
cmake \
$(rocm_cmake_params) \
$(rocm_common_cmake_params) \
-DHIPCC_BACKWARD_COMPATIBILITY=OFF \
-DCMAKE_INSTALL_PREFIX="$OUT_DIR" \
$HIPCC_ROOT
popd
cmake --build "$BUILD_DIR" -- $MAKEOPTS
echo "Installing and Packaging hipcc"
cmake --build "$BUILD_DIR" -- $MAKEOPTS install
cmake --build "$BUILD_DIR" -- $MAKEOPTS package
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_DEB" $BUILD_DIR/hipcc*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$PACKAGE_RPM" $BUILD_DIR/hipcc*.rpm
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${PACKAGE_DEB};;
("rpm")
echo ${PACKAGE_RPM};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1 ;;
esac
exit
}
case $TARGET in
(clean) clean ;;
(build) build ; copy_hipcc_sources ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,60 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipCUB
build_hipcub() {
echo "Start build"
cd $COMPONENT_SRC
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
ASAN_CMAKE_PARAMS="false"
fi
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
GPU_TARGETS="gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101"
fi
CXX=$(set_build_variables CXX)\
cmake \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DCMAKE_MODULE_PATH="${ROCM_PATH}/lib/cmake/hip;${ROCM_PATH}/hip/cmake" \
-Drocprim_DIR="${ROCM_PATH}/rocprim" \
-DBUILD_TEST=ON \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipcub() {
echo "Cleaning hipCUB build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipcub ;;
outdir) print_output_directory ;;
clean) clean_hipcub ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,61 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipFFT
build_hipfft() {
echo "Start build"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
cd $COMPONENT_SRC
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
GPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101"
fi
cmake \
-DCMAKE_CXX_COMPILER=$(set_build_variables CXX) \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
-DCMAKE_MODULE_PATH="${ROCM_PATH}/lib/cmake/hip" \
-DCMAKE_SKIP_BUILD_RPATH=TRUE \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
-DBUILD_CLIENTS_TESTS=ON \
-DBUILD_CLIENTS_SAMPLES=ON \
-L \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipfft() {
echo "Cleaning hipFFT build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipfft ;;
outdir) print_output_directory ;;
clean) clean_hipfft ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,50 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipfort
build_hipfort() {
echo "Start build"
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
cmake --trace \
-DHIPFORT_INSTALL_DIR="${ROCM_PATH}" \
-DCMAKE_PREFIX_PATH="${ROCM_PATH}/llvm;${ROCM_PATH}" \
-DCMAKE_BUILD_TYPE=Release \
-DHIPFORT_COMPILER="${ROCM_PATH}/${ROCM_LLVMDIR}/bin/flang" \
-DCMAKE_Fortran_FLAGS="-Mfree" \
-DHIPFORT_COMPILER_FLAGS="-cpp" \
-DCMAKE_Fortran_FLAGS_DEBUG="" \
${LAUNCHER_FLAGS} \
-DROCM_SYMLINK_LIBS=OFF \
-DCMAKE_INSTALL_PREFIX=${ROCM_PATH} \
-DHIPFORT_AR="${ROCM_PATH}/${ROCM_LLVMDIR}/bin/llvm-ar" \
-DHIPFORT_RANLIB="${ROCM_PATH}/${ROCM_LLVMDIR}/bin/llvm-ranlib" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipfort() {
echo "Cleaning hipFORT build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipfort ;;
outdir) print_output_directory ;;
clean) clean_hipfort ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,142 @@
#!/bin/bash
source "$(dirname "${BASH_SOURCE}")/compute_utils.sh"
printUsage() {
echo
echo "Usage: $(basename "${BASH_SOURCE}") [options ...]"
echo
echo "Options:"
echo " -c, --clean Clean output and delete all intermediate work"
echo " -o, --outdir <pkg_type> Print path of output directory containing packages of
type referred to by pkg_type"
echo " -h, --help Prints this help"
echo " -r, --release Make a release build"
echo " -a, --address_sanitizer Enable address sanitizer"
echo " -s, --static Supports static CI by accepting this param & not bailing out. No effect of the param though"
echo
return 0
}
TARGET="build"
MAKEOPTS="$DASH_JAY"
HIPIFY_CLANG_BUILD_DIR="$(getBuildPath $HIPIFY_ROOT)"
HIPIFY_CLANG_DIST_DIR="$HIPIFY_CLANG_BUILD_DIR/dist"
BUILD_TYPE="Debug"
PACKAGE_ROOT="$(getPackageRoot)"
HIPIFY_CLANG_HASH=""
LIGHTNING_PATH="$ROCM_INSTALL_PATH/llvm"
ADDRESS_SANITIZER=false
DEB_PATH="$(getDebPath hipify)"
RPM_PATH="$(getRpmPath hipify)"
SHARED_LIBS="ON"
CLEAN_OR_OUT=0;
MAKETARGET="deb"
PKGTYPE="deb"
VALID_STR=`getopt -o hcraso: --long help,clean,release,static,address_sanitizer,outdir: -- "$@"`
eval set -- "$VALID_STR"
while true ;
do
case "$1" in
(-h | --help)
printUsage ; exit 0;;
(-c | --clean)
TARGET="clean" ; ((CLEAN_OR_OUT|=1)) ; shift ;;
(-r | --release)
BUILD_TYPE="RelWithDebInfo" ; shift ;;
(-a | --address_sanitizer)
set_asan_env_vars
set_address_sanitizer_on
ADDRESS_SANITIZER=true ; shift ;;
(-s | --static)
SHARED_LIBS="OFF" ; shift ;;
(-o | --outdir)
TARGET="outdir"; PKGTYPE=$2 ; OUT_DIR_SPECIFIED=1 ; ((CLEAN_OR_OUT|=2)) ; shift 2 ;;
--) shift; break;;
(*)
echo " This should never come but just incase : UNEXPECTED ERROR Parm : [$1] ">&2 ; exit 20;;
esac
done
RET_CONFLICT=1
check_conflicting_options $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
if [ $RET_CONFLICT -ge 30 ]; then
print_vars $API_NAME $TARGET $BUILD_TYPE $SHARED_LIBS $CLEAN_OR_OUT $PKGTYPE $MAKETARGET
exit $RET_CONFLICT
fi
clean_hipify() {
echo "Cleaning hipify-clang"
rm -rf "$HIPIFY_CLANG_BUILD_DIR"
rm -rf "$HIPIFY_CLANG_DIST_DIR"
rm -rf "$DEB_PATH"
rm -rf "$RPM_PATH"
}
package_hipify() {
if [ "$PACKAGEEXT" = "deb" ]; then
rm -rf "$DEB_PATH"
mkdir -p "$DEB_PATH"
fi
if [ "$PACKAGEEXT" = "rpm" ]; then
rm -rf "$RPM_PATH"
mkdir -p "$RPM_PATH"
fi
pushd "$HIPIFY_CLANG_BUILD_DIR"
make $MAKEOPTS package_hipify-clang
popd
copy_if DEB "${CPACKGEN:-"DEB;RPM"}" "$DEB_PATH" $HIPIFY_CLANG_BUILD_DIR/hipify*.deb
copy_if RPM "${CPACKGEN:-"DEB;RPM"}" "$RPM_PATH" $HIPIFY_CLANG_BUILD_DIR/hipify*.rpm
}
build_hipify() {
echo "Building hipify-clang binaries"
mkdir -p "$HIPIFY_CLANG_BUILD_DIR"
mkdir -p "$HIPIFY_CLANG_DIST_DIR"
pushd "$HIPIFY_CLANG_BUILD_DIR"
cmake \
-DCMAKE_BUILD_TYPE="$BUILD_TYPE" \
$(rocm_common_cmake_params) \
-DCMAKE_INSTALL_PREFIX="$HIPIFY_CLANG_DIST_DIR" \
-DCPACK_PACKAGING_INSTALL_PREFIX=$ROCM_INSTALL_PATH \
-DCMAKE_PREFIX_PATH="$LIGHTNING_PATH" \
-DADDRESS_SANITIZER="$ADDRESS_SANITIZER" \
$HIPIFY_ROOT
cmake --build . -- $MAKEOPTS install
popd
pushd "$HIPIFY_ROOT"
HIPIFY_CLANG_HASH=`git describe --dirty --long --match [0-9]* --always`
popd
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${DEB_PATH};;
("rpm")
echo ${RPM_PATH};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
case $TARGET in
(clean) clean_hipify ;;
(build) build_hipify; package_hipify ;;
(outdir) print_output_directory ;;
(*) die "Invalid target $TARGET" ;;
esac
echo "Operation complete"

View File

@@ -0,0 +1,90 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipRAND
while [ "$1" != "" ];
do
case $1 in
-o | --outdir )
shift 1; PKGTYPE=$1 ; TARGET="outdir" ;;
-c | --clean )
TARGET="clean" ;;
*)
break ;;
esac
shift 1
done
build_hiprand() {
echo "Start build"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
cd $COMPONENT_SRC
remote_name=$(git remote show | head -n 1)
[ "$remote_name" == "origin" ] || git remote rename "$remote_name" origin
git submodule update --init --force
mkdir "$BUILD_DIR" && cd "$BUILD_DIR"
if [ -n "$GPU_ARCHS" ]; then
GPU_TARGETS="$GPU_ARCHS"
else
GPU_TARGETS="gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101"
fi
CXX=$(set_build_variables CXX)\
cmake \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DAMDGPU_TARGETS=${GPU_TARGETS} \
-DBUILD_TEST=ON \
-DBUILD_BENCHMARK=ON \
-DBUILD_CRUSH_TEST=ON \
-DDEPENDENCIES_FORCE_DOWNLOAD=ON \
-DHIP_COMPILER=clang \
-DCMAKE_MODULE_PATH="${ROCM_PATH}/lib/cmake/hip" \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
$SCCACHE_BIN -s || echo "Unable to display sccache stats"
}
clean_hiprand() {
echo "Cleaning hipRAND build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
print_output_directory() {
case ${PKGTYPE} in
("deb")
echo ${DEB_PATH};;
("rpm")
echo ${RPM_PATH};;
(*)
echo "Invalid package type \"${PKGTYPE}\" provided for -o" >&2; exit 1;;
esac
exit
}
case $TARGET in
build) build_hiprand ;;
outdir) print_output_directory ;;
clean) clean_hiprand ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,64 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
set_component_src hipSOLVER
build_hipsolver() {
echo "Start build"
cd $COMPONENT_SRC
CXX="g++"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
echo "C compiler: $CC"
echo "CXX compiler: $CXX"
echo "FC compiler: $FC"
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
rebuild_lapack
fi
cmake \
-DUSE_CUDA=OFF \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DBUILD_CLIENTS_TESTS=ON \
-DBUILD_CLIENTS_BENCHMARKS=ON \
-DBUILD_CLIENTS_SAMPLES=ON \
-DCPACK_SET_DESTDIR=OFF \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipsolver() {
echo "Cleaning hipSOLVER build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipsolver ;;
outdir) print_output_directory ;;
clean) clean_hipsolver ;;
*) die "Invalid target $TARGET" ;;
esac

View File

@@ -0,0 +1,61 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/compute_helper.sh"
PATH=${ROCM_PATH}/bin:$PATH
set_component_src hipSPARSE
build_hipsparse() {
echo "Start build"
cd $COMPONENT_SRC
CXX="g++"
if [ "${ENABLE_ADDRESS_SANITIZER}" == "true" ]; then
set_asan_env_vars
set_address_sanitizer_on
fi
echo "C compiler: $CC"
echo "CXX compiler: $CXX"
mkdir -p "$BUILD_DIR" && cd "$BUILD_DIR"
cmake \
-DCPACK_SET_DESTDIR=OFF \
${LAUNCHER_FLAGS} \
$(rocm_common_cmake_params) \
-DUSE_CUDA=OFF \
-DBUILD_CLIENTS_SAMPLES=ON \
-DBUILD_CLIENTS_TESTS=ON \
-DCMAKE_INSTALL_PREFIX=${ROCM_PATH} \
-DCMAKE_MODULE_PATH="${ROCM_PATH}/lib/cmake/hip;${ROCM_PATH}/hip/cmake" \
-DBUILD_ADDRESS_SANITIZER="${ADDRESS_SANITIZER}" \
"$COMPONENT_SRC"
cmake --build "$BUILD_DIR" -- -j${PROC}
cmake --build "$BUILD_DIR" -- install
cmake --build "$BUILD_DIR" -- package
rm -rf _CPack_Packages/ && find -name '*.o' -delete
mkdir -p $PACKAGE_DIR && cp ${BUILD_DIR}/*.${PKGTYPE} $PACKAGE_DIR
show_build_cache_stats
}
clean_hipsparse() {
echo "Cleaning hipSPARSE build directory: ${BUILD_DIR} ${PACKAGE_DIR}"
rm -rf "$BUILD_DIR" "$PACKAGE_DIR"
echo "Done!"
}
stage2_command_args "$@"
case $TARGET in
build) build_hipsparse ;;
outdir) print_output_directory ;;
clean) clean_hipsparse ;;
*) die "Invalid target $TARGET" ;;
esac

Some files were not shown because too many files have changed in this diff Show More