mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 14:48:06 -05:00
Compare commits
505 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
773f5de407 | ||
|
|
b297ced032 | ||
|
|
2dc22ca890 | ||
|
|
85102079ed | ||
|
|
ba95e0e689 | ||
|
|
1691d369e9 | ||
|
|
172b0f7c08 | ||
|
|
c67fac78bd | ||
|
|
e0b8ec4dfb | ||
|
|
38f2d043dc | ||
|
|
3a43bacdda | ||
|
|
48d8fe139b | ||
|
|
7455fe57b8 | ||
|
|
52c0a47e84 | ||
|
|
cbab9a465d | ||
|
|
459283da3c | ||
|
|
1b4f25733d | ||
|
|
b287372be5 | ||
|
|
78e8baf147 | ||
|
|
3e0c8b47e3 | ||
|
|
c3f0b99cc0 | ||
|
|
c9d1679486 | ||
|
|
fdbef17d7b | ||
|
|
6592a41a7f | ||
|
|
65a936023b | ||
|
|
2a64949081 | ||
|
|
0a17434517 | ||
|
|
2be7e5ac1e | ||
|
|
ae80c4a31c | ||
|
|
dd89a692e1 | ||
|
|
bf74351e5a | ||
|
|
f2067767e0 | ||
|
|
effd4174fb | ||
|
|
453751a86f | ||
|
|
fb644412d5 | ||
|
|
e8fdc34b71 | ||
|
|
b4031ef23c | ||
|
|
d0bd4e6f03 | ||
|
|
0056b9453e | ||
|
|
3d1ad79766 | ||
|
|
8683bed11b | ||
|
|
847cd7c423 | ||
|
|
42cad29c04 | ||
|
|
f7b2fe0a48 | ||
|
|
bb199aa2b9 | ||
|
|
2f7b2a7fa1 | ||
|
|
7fd75919d1 | ||
|
|
4490c57c6a | ||
|
|
007f24fe7b | ||
|
|
afbb6e0f61 | ||
|
|
1b5a3e54c2 | ||
|
|
2c6eb9cf2a | ||
|
|
b93fdb811c | ||
|
|
096d91e190 | ||
|
|
02037f4384 | ||
|
|
c64dc46a50 | ||
|
|
702d8e4c8e | ||
|
|
19344d7b61 | ||
|
|
807ec6afcf | ||
|
|
4c04da05c3 | ||
|
|
411334716c | ||
|
|
99f0875e70 | ||
|
|
50658d0812 | ||
|
|
7aeecdf8e2 | ||
|
|
4f669eb2c6 | ||
|
|
7d1f314303 | ||
|
|
c523f51e58 | ||
|
|
b566858909 | ||
|
|
c33b9e3611 | ||
|
|
2646b4841d | ||
|
|
ff2f40d800 | ||
|
|
71bcc5b204 | ||
|
|
fd840df30b | ||
|
|
58e26eede1 | ||
|
|
407a9d4cb0 | ||
|
|
81b7745f8e | ||
|
|
6af62fd30a | ||
|
|
bb692dfd84 | ||
|
|
8d51d0e803 | ||
|
|
66b8b96c72 | ||
|
|
fb098b6354 | ||
|
|
72107dd6d5 | ||
|
|
99c1590057 | ||
|
|
3d86323f88 | ||
|
|
636d4cc736 | ||
|
|
d1ce815d8d | ||
|
|
80ced95526 | ||
|
|
09c6a9fdef | ||
|
|
372ddd5af3 | ||
|
|
eb956cfc5c | ||
|
|
e05cdca54f | ||
|
|
04c7374f41 | ||
|
|
39de859bd1 | ||
|
|
c8531ac7ea | ||
|
|
420bbfa126 | ||
|
|
4881887e2c | ||
|
|
148d6670ad | ||
|
|
9770e9b6ef | ||
|
|
ee4cf66d67 | ||
|
|
908862242a | ||
|
|
6ba30f191c | ||
|
|
674dc355e4 | ||
|
|
c7f3a56811 | ||
|
|
0107fa731e | ||
|
|
a87ec360e1 | ||
|
|
7215e1e8c7 | ||
|
|
e4a59d8c66 | ||
|
|
8108fe7275 | ||
|
|
d3ff9d7c8e | ||
|
|
939ee7de0c | ||
|
|
f1e6c285dd | ||
|
|
ff1d9b4d69 | ||
|
|
ef3fa601d5 | ||
|
|
576191a104 | ||
|
|
2db07b5cda | ||
|
|
fe3dc988b8 | ||
|
|
36c879b7e0 | ||
|
|
91450dca10 | ||
|
|
2de92767e6 | ||
|
|
54d226acd9 | ||
|
|
f46d7ec00f | ||
|
|
09c946b6fb | ||
|
|
5285669d98 | ||
|
|
9b3138cffa | ||
|
|
61fffe3250 | ||
|
|
43ccfbbe80 | ||
|
|
1515fb3779 | ||
|
|
410a69efe4 | ||
|
|
248cbf8bc1 | ||
|
|
0171dced89 | ||
|
|
f2d6675839 | ||
|
|
7d0fad9aa8 | ||
|
|
4132a2609c | ||
|
|
c56d5b7495 | ||
|
|
a2e2bd3277 | ||
|
|
32d1cdcd90 | ||
|
|
ac16524ebd | ||
|
|
157d86b780 | ||
|
|
35ca027aa4 | ||
|
|
90c1d9068f | ||
|
|
cb8d21a0df | ||
|
|
6f8cf36279 | ||
|
|
8eb5fef37c | ||
|
|
a5f0b30a47 | ||
|
|
2ec051dec5 | ||
|
|
fd6bbe18a7 | ||
|
|
14ada81c41 | ||
|
|
a613bd6824 | ||
|
|
b3459da524 | ||
|
|
eba211d7f1 | ||
|
|
14bb59fca9 | ||
|
|
a98236a4e3 | ||
|
|
5cb6bfe151 | ||
|
|
6e7422ded7 | ||
|
|
7b7ff53985 | ||
|
|
019796dc63 | ||
|
|
f21cfe1171 | ||
|
|
170cb47a4f | ||
|
|
d19a8e4a83 | ||
|
|
3a0b8529ed | ||
|
|
f9d7fc2e6a | ||
|
|
d424687191 | ||
|
|
35e6e50888 | ||
|
|
91cfe98eb3 | ||
|
|
036aaa2e78 | ||
|
|
78258e0f85 | ||
|
|
c79d9f74ef | ||
|
|
fb1b78c6f0 | ||
|
|
3a70d75f5e | ||
|
|
61e1f088a1 | ||
|
|
1f6e5c5e04 | ||
|
|
e8a0769842 | ||
|
|
6f9579d052 | ||
|
|
245d53a021 | ||
|
|
35dbbb22bc | ||
|
|
03dc8cee00 | ||
|
|
323e5fd27a | ||
|
|
b11fd7b492 | ||
|
|
5e2efa05a6 | ||
|
|
29a90f0271 | ||
|
|
c06242bb89 | ||
|
|
68e8453ca5 | ||
|
|
503b8bcc86 | ||
|
|
e3d97d339a | ||
|
|
978c58d196 | ||
|
|
a366048b64 | ||
|
|
4c3e33c291 | ||
|
|
89758e67d8 | ||
|
|
5d0f201b4d | ||
|
|
e3677d89a6 | ||
|
|
f20edab8fc | ||
|
|
6f84d50011 | ||
|
|
57dd082f28 | ||
|
|
eeea0d2180 | ||
|
|
93c6d17922 | ||
|
|
f91c2b9b4a | ||
|
|
5e6b66ca39 | ||
|
|
6b8b359d03 | ||
|
|
38e659e5f0 | ||
|
|
0894547f5a | ||
|
|
aca31170c4 | ||
|
|
d21ec9eea5 | ||
|
|
189c269350 | ||
|
|
774cb7a1b3 | ||
|
|
024cb4db76 | ||
|
|
945fb286f7 | ||
|
|
ee93101541 | ||
|
|
e31841312b | ||
|
|
41b5298659 | ||
|
|
58790154b2 | ||
|
|
6f7f73ac0b | ||
|
|
b2e3bc8565 | ||
|
|
52979e2fdb | ||
|
|
0ea5216ace | ||
|
|
2e1b4dd5ee | ||
|
|
5c7b993c0c | ||
|
|
2d79b3c4bd | ||
|
|
fd59b5fbac | ||
|
|
0a643f4686 | ||
|
|
d9e5744f7a | ||
|
|
ccb849ec02 | ||
|
|
42d4867964 | ||
|
|
375359a5dd | ||
|
|
e92745f1ff | ||
|
|
0fa72358d3 | ||
|
|
6fec268a4e | ||
|
|
ff14cd1ff5 | ||
|
|
8f65688653 | ||
|
|
33d1493adb | ||
|
|
4b6c7776a2 | ||
|
|
af811daa1b | ||
|
|
d6c045e482 | ||
|
|
78b24cad39 | ||
|
|
753a94c0bb | ||
|
|
6ecad57c62 | ||
|
|
977554809a | ||
|
|
7b00f4493b | ||
|
|
95c439a272 | ||
|
|
94e04fbdc0 | ||
|
|
7ab59de8af | ||
|
|
175c817563 | ||
|
|
25516d312e | ||
|
|
30c345629a | ||
|
|
210dc94bbb | ||
|
|
a54023ccb8 | ||
|
|
17e3362dc7 | ||
|
|
0f9c0d884d | ||
|
|
c890de4b16 | ||
|
|
4ea77ab515 | ||
|
|
c0512612f4 | ||
|
|
1c81ac3747 | ||
|
|
4bafa42e52 | ||
|
|
493801e670 | ||
|
|
1a5152b7b3 | ||
|
|
874c881012 | ||
|
|
bdcaeea74c | ||
|
|
b02669acf7 | ||
|
|
844f10b2b1 | ||
|
|
d6c14920b4 | ||
|
|
4affe10a7c | ||
|
|
81341ef435 | ||
|
|
abacd328f9 | ||
|
|
80b2fb6e26 | ||
|
|
b53e8decfc | ||
|
|
5fcc2eafde | ||
|
|
2eb0d77bc6 | ||
|
|
d84b41908f | ||
|
|
986f8284d1 | ||
|
|
d92d9268dc | ||
|
|
1629d3f0ea | ||
|
|
6cf6b34b2e | ||
|
|
c35a0a121a | ||
|
|
412e383654 | ||
|
|
39f6fc187d | ||
|
|
05b480fb28 | ||
|
|
4fa44d90db | ||
|
|
c9ef13d823 | ||
|
|
f02172050b | ||
|
|
154dbe297a | ||
|
|
993a0a4fd4 | ||
|
|
c03662f410 | ||
|
|
442d7e4750 | ||
|
|
a09a8f517e | ||
|
|
0bbaab645d | ||
|
|
4b80405e2e | ||
|
|
d92e5b6c12 | ||
|
|
91fce2e134 | ||
|
|
27d53cf082 | ||
|
|
bc084246be | ||
|
|
9827ba7ff2 | ||
|
|
bafda50153 | ||
|
|
cae65c6c43 | ||
|
|
6a66167486 | ||
|
|
0f3543d6e8 | ||
|
|
678691c3d7 | ||
|
|
5cb3debed9 | ||
|
|
dd5d710727 | ||
|
|
eca1ecde92 | ||
|
|
ed1e414710 | ||
|
|
20c90fc406 | ||
|
|
6e39614b22 | ||
|
|
f7873ac74e | ||
|
|
a86fba556b | ||
|
|
7603fed080 | ||
|
|
9932cd4ac2 | ||
|
|
e8d104124f | ||
|
|
26f708da87 | ||
|
|
5a5e4dbb6e | ||
|
|
1c3dae75e1 | ||
|
|
bab853a0d3 | ||
|
|
5c7ccb3c26 | ||
|
|
f216b371a0 | ||
|
|
37faf170b1 | ||
|
|
8c40d14d7e | ||
|
|
d5101532f7 | ||
|
|
ef4e7ca1fe | ||
|
|
be68246824 | ||
|
|
1626ee4d8b | ||
|
|
f80044c7db | ||
|
|
412f6f2b0e | ||
|
|
bee7c1223f | ||
|
|
8af34e2026 | ||
|
|
0475650f00 | ||
|
|
cb73e9145a | ||
|
|
7316031fe6 | ||
|
|
76cb264f34 | ||
|
|
9c36e44a91 | ||
|
|
1037f8845a | ||
|
|
c2e31f2d2b | ||
|
|
882f71302a | ||
|
|
3d2f10ce0c | ||
|
|
81f5314368 | ||
|
|
60e3a8107c | ||
|
|
b800801427 | ||
|
|
5637deb81e | ||
|
|
df1ae524b2 | ||
|
|
06fd378036 | ||
|
|
cbd4e8f0ba | ||
|
|
b07ae4ba6c | ||
|
|
2fe270beb3 | ||
|
|
1660ac335a | ||
|
|
b357ba993b | ||
|
|
29f4d65da5 | ||
|
|
2de5a33aec | ||
|
|
e805e98701 | ||
|
|
a2785d2b5a | ||
|
|
8882410560 | ||
|
|
0af430d1cb | ||
|
|
33bc3c5e2b | ||
|
|
e1a1a4e712 | ||
|
|
355feae2e2 | ||
|
|
b3c566f6b9 | ||
|
|
9a3fc8c773 | ||
|
|
17be0ce7aa | ||
|
|
c9c41a34c2 | ||
|
|
e71b8212f9 | ||
|
|
8c1df97e34 | ||
|
|
957005f596 | ||
|
|
2383edc1fe | ||
|
|
c4b4abe354 | ||
|
|
9b2b1d3a66 | ||
|
|
8617b653f8 | ||
|
|
26ddf7e6ac | ||
|
|
91f21d890f | ||
|
|
a6fbf60594 | ||
|
|
61f09e2ab9 | ||
|
|
0d790615ef | ||
|
|
7098bdc03b | ||
|
|
8eee155585 | ||
|
|
10f6086819 | ||
|
|
964a7cd0b5 | ||
|
|
d3fe7439cf | ||
|
|
56f566c1dc | ||
|
|
e3227d14e6 | ||
|
|
88f1493b68 | ||
|
|
3ca9cb1fcc | ||
|
|
aebf1b4480 | ||
|
|
0840c14b6d | ||
|
|
daa0184d2e | ||
|
|
3b5019e03f | ||
|
|
68f505e375 | ||
|
|
05a66f75fe | ||
|
|
3c37ae88f0 | ||
|
|
985786e98d | ||
|
|
f25e27acf0 | ||
|
|
519364179c | ||
|
|
db43d18c37 | ||
|
|
4f53183696 | ||
|
|
94476f34ca | ||
|
|
4bc1bf00c6 | ||
|
|
76fd6b2290 | ||
|
|
e5345a9cca | ||
|
|
c2080a90c7 | ||
|
|
2f40189575 | ||
|
|
9e1a82d327 | ||
|
|
3aab9e1bc5 | ||
|
|
2b0ce5e5c2 | ||
|
|
f1be2d291a | ||
|
|
07cb61f969 | ||
|
|
c486c39b50 | ||
|
|
e68d9e9ce2 | ||
|
|
bff5c4a955 | ||
|
|
b0abc43c46 | ||
|
|
ceabccad83 | ||
|
|
2628812fc4 | ||
|
|
df3ea80290 | ||
|
|
b6647dfb22 | ||
|
|
08dad2dc41 | ||
|
|
c34fddb26a | ||
|
|
b4c5980a96 | ||
|
|
52ce201401 | ||
|
|
505233473d | ||
|
|
4f4f4556a5 | ||
|
|
4f8426376b | ||
|
|
d476d09aff | ||
|
|
04beef8773 | ||
|
|
95d1752874 | ||
|
|
eabf72c2db | ||
|
|
53bd9b5da4 | ||
|
|
0665e73e2d | ||
|
|
264d353071 | ||
|
|
d58e2b16db | ||
|
|
010a191938 | ||
|
|
977e9c2295 | ||
|
|
eac9772fff | ||
|
|
151a4bd7bc | ||
|
|
9d28684161 | ||
|
|
a7edb17538 | ||
|
|
9ea9b33d14 | ||
|
|
59afdef1fb | ||
|
|
ea8ff1b17d | ||
|
|
808a7709aa | ||
|
|
8cc17e307c | ||
|
|
7fd6146b16 | ||
|
|
e839054e56 | ||
|
|
1d42f7cc62 | ||
|
|
78c4a4c12a | ||
|
|
c587d75701 | ||
|
|
a88151f505 | ||
|
|
98029db4ee | ||
|
|
ff7d9eb17a | ||
|
|
2ec8757ffa | ||
|
|
28c3384433 | ||
|
|
91c26c502d | ||
|
|
0ae99ea21e | ||
|
|
60571680b5 | ||
|
|
65ebbaa117 | ||
|
|
e24bd407c1 | ||
|
|
19156cf2c6 | ||
|
|
0d5f17a58b | ||
|
|
6b93d7a75a | ||
|
|
acdb5c90a6 | ||
|
|
073ac54e47 | ||
|
|
3dfc0cdbf1 | ||
|
|
d0377dd947 | ||
|
|
35ec186cd9 | ||
|
|
da340c3d05 | ||
|
|
1d127d987b | ||
|
|
00b0d9430e | ||
|
|
14acec6000 | ||
|
|
71bc63d2d8 | ||
|
|
7b087769a2 | ||
|
|
08d0840b69 | ||
|
|
c154b7e0a3 | ||
|
|
ae734e7846 | ||
|
|
9f5cd4500c | ||
|
|
51e7d9550f | ||
|
|
55d0a88ec5 | ||
|
|
67f988f58b | ||
|
|
7ee22790ce | ||
|
|
ec05312de7 | ||
|
|
39e7ccd3c5 | ||
|
|
c4135ab541 | ||
|
|
dd56fd4d3a | ||
|
|
80f7dc79b9 | ||
|
|
231aa0bfc6 | ||
|
|
8655fb369a | ||
|
|
306b39ea5e | ||
|
|
9e055d92ce | ||
|
|
85b13c0513 | ||
|
|
dba913095a | ||
|
|
81b9d50c2c | ||
|
|
e9bb2fca36 | ||
|
|
16e96caf80 | ||
|
|
7e0efaa6b0 | ||
|
|
af4f291005 | ||
|
|
b9218832bc | ||
|
|
3f2c1d65eb | ||
|
|
ee4287fdd7 | ||
|
|
d63db0be41 | ||
|
|
6a37323fe7 | ||
|
|
b6b7b32e6d | ||
|
|
7c11126938 | ||
|
|
ac0b72497e | ||
|
|
68bc7f83da | ||
|
|
5bbe8ecdcc | ||
|
|
6bc408d051 | ||
|
|
20762b9a96 | ||
|
|
fa5395a1a6 | ||
|
|
254d863b91 | ||
|
|
03bf20e614 | ||
|
|
9786a75390 | ||
|
|
95543cae2a | ||
|
|
1cf3eef9da | ||
|
|
3c71bb25e8 |
@@ -128,6 +128,9 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
parameters:
|
||||
cmakeVersion: '3.28.6'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -152,6 +155,7 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||
@@ -192,6 +196,9 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
parameters:
|
||||
cmakeVersion: '3.28.6'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -217,6 +224,7 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
|
||||
-DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hip_clr_combined
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -15,6 +34,7 @@ parameters:
|
||||
default:
|
||||
- cmake
|
||||
- libnuma-dev
|
||||
- libsimde-dev
|
||||
- mesa-common-dev
|
||||
- ninja-build
|
||||
- ocl-icd-libopencl1
|
||||
@@ -35,93 +55,24 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- llvm-project
|
||||
|
||||
# hip and clr are tightly-coupled
|
||||
# run this same template for both repos
|
||||
# any changes for clr should just trigger HIP pipeline
|
||||
# similarly for hipother repo, for Nvidia backend
|
||||
- ROCR-Runtime
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
- { os: ubuntu2204, packageManager: apt, platform: amd }
|
||||
- { os: ubuntu2204, packageManager: apt, platform: nvidia }
|
||||
- { os: almalinux8, packageManager: dnf, platform: amd }
|
||||
- { os: almalinux8, packageManager: dnf, platform: nvidia }
|
||||
|
||||
# HIP with AMD backend
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_amd
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
# compile clr
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=amd
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: amd
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: amd
|
||||
|
||||
# HIP with Nvidia backend
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_clr_combined_${{ job.os }}_nvidia
|
||||
- job: ${{ parameters.componentName }}_${{ job.os }}_${{ job.platform }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
@@ -140,49 +91,45 @@ jobs:
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
# checkout triggering repo (either HIP or clr)
|
||||
# full checkout of rocm-systems superrepo, we need clr, hip, and hipother
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
# if this is triggered by HIP repo, matching repo is clr
|
||||
# if this is triggered by clr repo, matching repo is HIP
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: matching_repo
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: hipother_repo
|
||||
# sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
- script: 'ls -1R $(Agent.BuildDirectory)/rocm'
|
||||
displayName: 'Artifact listing'
|
||||
# compile clr
|
||||
${{ if eq(job.platform, 'amd') }}:
|
||||
dependencyList: ${{ parameters.rocmDependenciesAMD }}
|
||||
${{ elseif eq(job.platform, 'nvidia') }}:
|
||||
dependencyList: ${{ parameters.rocmDependenciesNvidia }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: clr
|
||||
cmakeBuildDir: '$(Build.SourcesDirectory)/clr/build'
|
||||
cmakeSourceDir: '$(Build.SourcesDirectory)/clr'
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/s/projects/clr/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/clr
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DHIP_COMMON_DIR=$(Build.SourcesDirectory)/HIP
|
||||
-DHIP_PLATFORM=nvidia
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DHIPCC_BIN_DIR=$(Agent.BuildDirectory)/rocm/bin
|
||||
-DHIP_COMMON_DIR=$(Agent.BuildDirectory)/s/projects/hip
|
||||
-DHIPNV_DIR=$(Agent.BuildDirectory)/s/projects/hipother/hipnv
|
||||
-DHIP_PLATFORM=${{ job.platform }}
|
||||
-DCLR_BUILD_HIP=ON
|
||||
-DCLR_BUILD_OCL=OFF
|
||||
-DHIPNV_DIR=$(Build.SourcesDirectory)/hipother/hipnv
|
||||
-DCLR_BUILD_OCL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
artifactName: ${{ job.platform }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
artifactName: nvidia
|
||||
artifactName: ${{ job.platform }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# environment: nvidia
|
||||
|
||||
@@ -79,7 +79,7 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- task: Bash@3
|
||||
displayName: Add lit to PATH
|
||||
inputs:
|
||||
|
||||
@@ -123,7 +123,7 @@ jobs:
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -131,6 +131,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -149,6 +150,7 @@ jobs:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Build and install other dependencies
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
@@ -210,6 +212,7 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -228,6 +231,7 @@ jobs:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Build and install other dependencies
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)/s
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: ROCR-Runtime
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -18,6 +37,7 @@ parameters:
|
||||
- libdrm-dev
|
||||
- libelf-dev
|
||||
- libnuma-dev
|
||||
- libsimde-dev
|
||||
- ninja-build
|
||||
- pkg-config
|
||||
- name: rocmDependencies
|
||||
@@ -45,6 +65,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ROCR_Runtime_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
@@ -65,14 +89,18 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -82,105 +110,112 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ROCR_Runtime_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- task: Bash@3
|
||||
displayName: Build kfdtest
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest
|
||||
script: |
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
|
||||
make
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: kfdtest
|
||||
testExecutable: BIN_DIR=$(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
|
||||
testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/libhsakmt/tests/kfdtest/scripts
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Build rocrtst
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Build.SourcesDirectory)/rocrtst/suites/test_common
|
||||
script: |
|
||||
echo $(Build.SourcesDirectory)/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
|
||||
export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
|
||||
mkdir build && cd build
|
||||
cmake .. \
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
|
||||
-DTARGET_DEVICES=${{ job.target }} \
|
||||
-DROCM_DIR=$(Agent.BuildDirectory)/rocm \
|
||||
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
|
||||
-DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include
|
||||
make
|
||||
make rocrtst_kernels
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocrtst
|
||||
testExecutable: ./rocrtst64
|
||||
testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Build.SourcesDirectory)/rocrtst/suites/test_common/build/${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
# docker image will be missing libhwloc5
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ROCR_Runtime_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ROCR_Runtime_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- task: Bash@3
|
||||
displayName: Build kfdtest
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest
|
||||
script: |
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm ..
|
||||
make
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: kfdtest
|
||||
testExecutable: BIN_DIR=$(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/build ./run_kfdtest.sh
|
||||
testParameters: '-p core --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Agent.BuildDirectory)/s/libhsakmt/tests/kfdtest/scripts
|
||||
os: ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Build rocrtst
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
workingDirectory: $(Agent.BuildDirectory)/s/rocrtst/suites/test_common
|
||||
script: |
|
||||
echo $(Agent.BuildDirectory)/s/rocrtst/thirdparty/lib | sudo tee -a /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo cat /etc/ld.so.conf.d/rocm-ci.conf
|
||||
sudo ldconfig -v
|
||||
ldconfig -p
|
||||
if [ -e /opt/rh/gcc-toolset-14/enable ]; then
|
||||
source /opt/rh/gcc-toolset-14/enable
|
||||
fi
|
||||
BASE_CLANG_DIR=$(Agent.BuildDirectory)/rocm/llvm/lib/clang
|
||||
export NEWEST_CLANG_VER=$(ls -1 $BASE_CLANG_DIR | sort -V | tail -n 1)
|
||||
mkdir build && cd build
|
||||
cmake .. \
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm \
|
||||
-DTARGET_DEVICES=${{ job.target }} \
|
||||
-DROCM_DIR=$(Agent.BuildDirectory)/rocm \
|
||||
-DLLVM_DIR=$(Agent.BuildDirectory)/rocm/llvm/bin \
|
||||
-DOPENCL_INC_DIR=$BASE_CLANG_DIR/$NEWEST_CLANG_VER/include
|
||||
make
|
||||
make rocrtst_kernels
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocrtst
|
||||
testExecutable: ./rocrtst64
|
||||
testParameters: '--gtest_filter="-rocrtstNeg.Memory_Negative_Tests:rocrtstFunc.Memory_Max_Mem" --gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
testDir: $(Agent.BuildDirectory)/s//rocrtst/suites/test_common/build/${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
# docker image will be missing libhwloc5
|
||||
|
||||
@@ -171,6 +171,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*${{ job.os }}*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: amdsmi
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -31,7 +50,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: amdsmi_build_${{ job.os }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -55,6 +74,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -65,50 +85,54 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: amdsmi_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: amdsmi_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: amdsmi
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
174
.azuredevops/components/aqlprofile.yml
Normal file
174
.azuredevops/components/aqlprofile.yml
Normal file
@@ -0,0 +1,174 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: aqlprofile
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
consolidateBuildAndInstall: true
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/aqlprofile/cmake_modules
|
||||
-DAQLPROFILE_BUILD_TESTS=ON
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/hsa-amd-aqlprofile/
|
||||
testExecutable: ./run_tests.sh
|
||||
testParameters: ''
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hip-tests
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -60,6 +79,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hip_tests_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -76,15 +99,18 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# compile hip-tests
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: hip-tests
|
||||
componentName: ${{ parameters.componentName }}
|
||||
cmakeSourceDir: '../catch'
|
||||
customBuildTarget: build_tests
|
||||
extraBuildFlags: >-
|
||||
@@ -96,9 +122,12 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -108,52 +137,56 @@ jobs:
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hip_tests_test_${{ job.target }}
|
||||
timeoutInMinutes: 240
|
||||
dependsOn: hip_tests_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Symlink rocm_agent_enumerator
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
# Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
|
||||
sudo mkdir -p /opt/rocm/bin
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hip_tests
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/hip
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
optSymLink: true
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hip_tests_test_${{ job.target }}
|
||||
timeoutInMinutes: 240
|
||||
dependsOn: hip_tests_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Symlink rocm_agent_enumerator
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
# Assuming that /opt is no longer persistent across runs, test environments are fully ephemeral
|
||||
sudo mkdir -p /opt/rocm/bin
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator /opt/rocm/bin/rocm_agent_enumerator
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/hip
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
optSymLink: true
|
||||
|
||||
@@ -35,9 +35,13 @@ parameters:
|
||||
- ccache
|
||||
- gfortran
|
||||
- git
|
||||
- libboost-filesystem-dev
|
||||
- libboost-program-options-dev
|
||||
- libdrm-dev
|
||||
- liblapack-dev
|
||||
- libmsgpack-dev
|
||||
- libnuma-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
@@ -46,6 +50,12 @@ parameters:
|
||||
default:
|
||||
- joblib
|
||||
- "packaging>=22.0"
|
||||
- pyyaml
|
||||
- msgpack
|
||||
- simplejson
|
||||
- ujson
|
||||
- orjson
|
||||
- yappi
|
||||
- --upgrade
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -67,6 +77,7 @@ parameters:
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocm_smi_lib
|
||||
- rocprofiler-register
|
||||
@@ -81,12 +92,12 @@ parameters:
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
#- { pool: rocm-ci_medium_build_pool, os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { pool: rocm-ci_ultra_build_pool, os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
#- { pool: rocm-ci_medium_build_pool, os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
@@ -134,7 +145,7 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -168,8 +179,8 @@ jobs:
|
||||
mkdir -p $(Agent.BuildDirectory)/temp-deps
|
||||
cd $(Agent.BuildDirectory)/temp-deps
|
||||
# position-independent LAPACK is required for almalinux8 builds
|
||||
cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/s/deps
|
||||
make
|
||||
cmake -DBUILD_GTEST=OFF -DBUILD_LAPACK=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON $(Agent.BuildDirectory)/sparse/projects/hipblaslt/deps
|
||||
make -j
|
||||
sudo make install
|
||||
- script: |
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
@@ -187,6 +198,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/hipblaslt/build
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
@@ -195,7 +208,11 @@ jobs:
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DHIPBLASLT_ENABLE_ROCROLLER=ON
|
||||
-DHIPBLASLT_ENABLE_FETCH=ON
|
||||
-DHIPBLASLT_ENABLE_BLIS=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
|
||||
@@ -69,7 +69,7 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
|
||||
@@ -40,10 +40,12 @@ parameters:
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- joblib
|
||||
- msgpack
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -52,6 +54,7 @@ parameters:
|
||||
- hipSPARSE
|
||||
- llvm-project
|
||||
- rocBLAS
|
||||
- rocm-cmake
|
||||
- rocm_smi_lib
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
@@ -65,6 +68,7 @@ parameters:
|
||||
- llvm-project
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- rocm-cmake
|
||||
- rocBLAS
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
@@ -108,12 +112,13 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
# ignore sparse checkout for monorepo case, we want access to hipblaslt directory
|
||||
# sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -123,14 +128,20 @@ jobs:
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# NOTE: content between `---` is for transition support between old/new build systems
|
||||
# and should be removed once transition is complete.
|
||||
# -----------------------------
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
- script: mkdir $(Pipeline.Workspace)/deps
|
||||
- script: mkdir -p $(Pipeline.Workspace)/deps
|
||||
displayName: Create temp folder for external dependencies
|
||||
# hipSPARSELt already has a CMake script for external deps, so we can just run that
|
||||
# https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
|
||||
- script: cmake $(Pipeline.Workspace)/s/deps
|
||||
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
script: cmake $(Pipeline.Workspace)/s/projects/hipsparselt/deps
|
||||
${{ else }}:
|
||||
script: cmake $(Pipeline.Workspace)/s/deps
|
||||
displayName: Configure hipSPARSELt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
- script: make
|
||||
@@ -139,22 +150,39 @@ jobs:
|
||||
- script: sudo make install
|
||||
displayName: Install hipSPARSELt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# -----------------------------
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
# NOTE: the following options are old build only
|
||||
# and can be removed after full transition to new build
|
||||
# -DAMDGPU_TARGETS=${{ job.target }}
|
||||
# -DCMAKE_Fortran_COMPILER=f95
|
||||
# -DTensile_LOGIC=
|
||||
# -DTensile_CPU_THREADS=
|
||||
# -DTensile_LIBRARY_FORMAT=msgpack
|
||||
# -DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
# -DBUILD_CLIENTS_TESTS=ON
|
||||
# -DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
-DHIPSPARSELT_ENABLE_FETCH=ON
|
||||
-GNinja
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
cmakeBuildDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: hipTensor
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -51,7 +70,7 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: hipTensor_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -66,17 +85,21 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/rocm/llvm
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DHIPTENSOR_BUILD_TESTS=ON
|
||||
@@ -84,9 +107,12 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -94,44 +120,47 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: hipTensor_test_${{ job.target }}
|
||||
timeoutInMinutes: 90
|
||||
dependsOn: hipTensor_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: hipTensor
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
|
||||
testParameters: '-E ".*-extended" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
timeoutInMinutes: 90
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
|
||||
testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
|
||||
@@ -30,7 +30,7 @@ parameters:
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: ubuntu2404, packageManager: apt }
|
||||
# - { os: ubuntu2404, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
|
||||
jobs:
|
||||
|
||||
308
.azuredevops/components/origami.yml
Normal file
308
.azuredevops/components/origami.yml
Normal file
@@ -0,0 +1,308 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: origami
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
type: boolean
|
||||
default: false
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- wget
|
||||
- python3
|
||||
- python3-dev
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- libgtest-dev
|
||||
- libboost-filesystem-dev
|
||||
- libboost-program-options-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- nanobind>=2.0.0
|
||||
- pytest
|
||||
- pytest-cov
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-register
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt }
|
||||
- { os: almalinux8, packageManager: dnf }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1151 }
|
||||
# - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- hipBLASLt:
|
||||
name: hipBLASLt
|
||||
sparseCheckoutDir: projects/hipblaslt
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- origami_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: origami_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- ${{ if ne(job.os, 'almalinux8') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- catch2
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DORIGAMI_BUILD_SHARED_LIBS=ON
|
||||
-DORIGAMI_ENABLE_PYTHON=ON
|
||||
-DORIGAMI_BUILD_TESTING=ON
|
||||
-DORIGAMI_ENABLE_FETCH=ON
|
||||
-GNinja
|
||||
- ${{ if ne(job.os, 'almalinux8') }}:
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Publish Build Directory Artifact'
|
||||
inputs:
|
||||
targetPath: '$(Agent.BuildDirectory)/s/build'
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_build_dir'
|
||||
publishLocation: 'pipeline'
|
||||
- task: PublishPipelineArtifact@1
|
||||
displayName: 'Publish Python Source Artifact'
|
||||
inputs:
|
||||
targetPath: '$(Agent.BuildDirectory)/s/python'
|
||||
artifact: '${{ parameters.componentName }}_${{ job.os }}_python_src'
|
||||
publishLocation: 'pipeline'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: origami_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: origami_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- gtest
|
||||
- ${{ if ne(job.os, 'almalinux8') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- catch2
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: CMake@1
|
||||
displayName: 'Origami Test CMake Configuration'
|
||||
inputs:
|
||||
cmakeArgs: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DORIGAMI_BUILD_SHARED_LIBS=ON
|
||||
-DORIGAMI_ENABLE_PYTHON=ON
|
||||
-DORIGAMI_BUILD_TESTING=ON
|
||||
-GNinja
|
||||
$(Agent.BuildDirectory)/s
|
||||
- task: Bash@3
|
||||
displayName: 'Build Origami Tests and Python Bindings'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: build
|
||||
script: |
|
||||
cmake --build . --target origami-tests origami_python -- -j$(nproc)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
# Run tests using CTest (discovers and runs both C++ and Python tests)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: 'build'
|
||||
testParameters: '--output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
# Test pip install workflow
|
||||
# - task: Bash@3
|
||||
# displayName: 'Test Pip Install'
|
||||
# inputs:
|
||||
# targetType: inline
|
||||
# script: |
|
||||
# set -e
|
||||
|
||||
# echo "==================================================================="
|
||||
# echo "Testing pip install workflow (pip install -e .)"
|
||||
# echo "==================================================================="
|
||||
|
||||
# # Set environment variables for pip install CMake build
|
||||
# export ROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
# export CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm:$(Agent.BuildDirectory)/vendor
|
||||
# export CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
|
||||
# echo "ROCM_PATH: $ROCM_PATH"
|
||||
# echo "CMAKE_PREFIX_PATH: $CMAKE_PREFIX_PATH"
|
||||
# echo "CMAKE_CXX_COMPILER: $CMAKE_CXX_COMPILER"
|
||||
# echo ""
|
||||
|
||||
# # Install from source directory
|
||||
# cd "$(Agent.BuildDirectory)/s/python"
|
||||
# pip install -e .
|
||||
|
||||
# # Verify import works
|
||||
# echo ""
|
||||
# echo "Verifying origami can be imported..."
|
||||
# python3 -c "import origami; print('✓ Successfully imported origami')"
|
||||
|
||||
# # Run pytest on installed package
|
||||
# echo ""
|
||||
# echo "Running pytest tests..."
|
||||
# python3 -m pytest tests/ -v -m "not slow" --tb=short
|
||||
|
||||
# echo ""
|
||||
# echo "==================================================================="
|
||||
# echo "Pip install test completed successfully"
|
||||
# echo "==================================================================="
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
@@ -1,10 +1,35 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rccl
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: systemsRepo
|
||||
type: string
|
||||
default: systems_repo
|
||||
- name: systemsSparseCheckoutDir
|
||||
type: string
|
||||
default: 'projects/rocprofiler-sdk'
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -57,37 +82,52 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocprofiler-sdk:
|
||||
name: rocprofiler-sdk
|
||||
sparseCheckoutDir: ''
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rccl_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rccl_build_${{ job.target }}
|
||||
timeoutInMinutes: 90
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: HIP_ROCCLR_HOME
|
||||
value: $(Build.BinariesDirectory)/rocm
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
submoduleBehaviour: recursive
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
@@ -97,10 +137,14 @@ jobs:
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/bin/hipcc
|
||||
@@ -112,58 +156,87 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
installLatestCMake: true
|
||||
- ${{ if eq(job.os, 'ubuntu2204') }}:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraEnvVars:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
installLatestCMake: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rccl_test_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: rccl_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rccl
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rccl-UnitTests'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin'
|
||||
testExecutable: './rccl-UnitTests'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.systemsRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.systemsSparseCheckoutDir }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
${{ if parameters.unifiedBuild }}:
|
||||
buildDependsOn: ${{ component.unifiedBuild.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ component.unifiedBuild.downstreamAggregateNames }}
|
||||
${{ else }}:
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rdc
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -33,6 +52,7 @@ parameters:
|
||||
- clr
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- hipRAND
|
||||
- llvm-project
|
||||
- rocBLAS
|
||||
- rocm-cmake
|
||||
@@ -43,6 +63,7 @@ parameters:
|
||||
- rocprofiler
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- rocRAND
|
||||
- ROCR-Runtime
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
@@ -74,7 +95,11 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rdc_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -85,16 +110,22 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
parameters:
|
||||
cmakeVersion: '3.25.0'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# Build grpc
|
||||
- task: Bash@3
|
||||
displayName: 'git clone grpc'
|
||||
@@ -104,6 +135,7 @@ jobs:
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
cmakeBuildDir: $(Build.SourcesDirectory)/grpc/build
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/grpc
|
||||
installDir: $(Build.SourcesDirectory)/bin
|
||||
@@ -117,6 +149,7 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DGRPC_ROOT="$(Build.SourcesDirectory)/bin"
|
||||
@@ -126,9 +159,12 @@ jobs:
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -136,60 +172,64 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rdc_test_${{ job.target }}
|
||||
dependsOn: rdc_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: ROCM_DIR
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Setup test environment
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
|
||||
echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
|
||||
sudo ldconfig -v
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- task: Bash@3
|
||||
displayName: Test rdc
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
$(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
|
||||
--batch_mode
|
||||
--start_rdcd
|
||||
--unauth_comm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/bin
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: ROCM_DIR
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Setup test environment
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo ln -s $(Agent.BuildDirectory)/rocm/bin/rdcd /usr/sbin/rdcd
|
||||
echo $(Agent.BuildDirectory)/rocm/lib/rdc/grpc/lib | sudo tee /etc/ld.so.conf.d/grpc.conf
|
||||
sudo ldconfig -v
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- task: Bash@3
|
||||
displayName: Test rdc
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: >-
|
||||
$(Agent.BuildDirectory)/rocm/share/rdc/rdctst_tests/rdctst
|
||||
--batch_mode
|
||||
--start_rdcd
|
||||
--unauth_comm
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/bin
|
||||
|
||||
@@ -70,6 +70,7 @@ parameters:
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
- llvm-project
|
||||
- rocm-cmake
|
||||
- rocminfo
|
||||
- rocprofiler-register
|
||||
- rocm_smi_lib
|
||||
@@ -84,12 +85,12 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
#- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
@@ -115,6 +116,13 @@ parameters:
|
||||
# buildDependsOn:
|
||||
# - rocBLAS_build
|
||||
# - rocPRIM_build
|
||||
# temporary rocblas->hipblas downstream path while the SOLVERs are disabled
|
||||
- hipBLAS:
|
||||
name: hipBLAS
|
||||
sparseCheckoutDir: projects/hipblas
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocBLAS_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
@@ -147,7 +155,7 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -172,6 +180,8 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/sparse/projects/rocblas
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/sparse/projects/rocblas/build
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
@@ -8,6 +8,25 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: rocPyDecodeRepo
|
||||
type: string
|
||||
default: rocpydecode_repo
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -56,10 +75,23 @@ parameters:
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- name: downstreamComponentMatrix
|
||||
type: object
|
||||
default:
|
||||
- rocPyDecode:
|
||||
name: rocPyDecode
|
||||
sparseCheckoutDir: ''
|
||||
skipUnifiedBuild: 'false'
|
||||
buildDependsOn:
|
||||
- rocDecode_build
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -83,12 +115,15 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -169,3 +204,15 @@ jobs:
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ if parameters.triggerDownstreamJobs }}:
|
||||
- ${{ each component in parameters.downstreamComponentMatrix }}:
|
||||
- ${{ if not(and(parameters.unifiedBuild, eq(component.skipUnifiedBuild, 'true'))) }}:
|
||||
- template: /.azuredevops/components/${{ component.name }}.yml@pipelines_repo
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.rocPyDecodeRepo }}
|
||||
sparseCheckoutDir: ${{ component.sparseCheckoutDir }}
|
||||
buildDependsOn: ${{ component.buildDependsOn }}
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}+${{ parameters.componentName }}
|
||||
triggerDownstreamJobs: true
|
||||
unifiedBuild: ${{ parameters.unifiedBuild }}
|
||||
|
||||
@@ -210,7 +210,7 @@ jobs:
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocprim'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }} -E device_merge_inplace'
|
||||
extraTestParameters: '-I ${{ job.shard }},,${{ job.shardCount }}'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
|
||||
@@ -5,6 +5,22 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -47,19 +63,19 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocPyDecode_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -74,16 +90,20 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: 'Save Python Package Paths'
|
||||
inputs:
|
||||
@@ -190,6 +210,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
|
||||
@@ -74,12 +74,12 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx942 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx90a }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1201 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1100 }
|
||||
- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
#- { os: almalinux8, packageManager: dnf, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
@@ -73,7 +73,7 @@ parameters:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1201 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1100 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
#- { os: ubuntu2204, packageManager: apt, target: gfx1030 }
|
||||
testJobs:
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocWMMA
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -66,11 +85,15 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocWMMA_build_${{ job.target }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.HIGH_BUILD_POOL }}
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -81,6 +104,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -102,9 +126,12 @@ jobs:
|
||||
# gfx1030 not supported in documentation
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -112,43 +139,45 @@ jobs:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocWMMA_test_${{ job.target }}
|
||||
timeoutInMinutes: 270
|
||||
dependsOn: rocWMMA_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocWMMA
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.target }}
|
||||
timeoutInMinutes: 350
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)/rocm/bin/rocwmma'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm-cmake
|
||||
testParameters: '-E "pass-version-parent" --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
testParameters: '-E "pass-version-parent" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocm-core
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -27,6 +46,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_core_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -50,8 +73,10 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -65,9 +90,12 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
|
||||
@@ -14,16 +14,42 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- libdw-dev
|
||||
- libglfw3-dev
|
||||
- libmsgpack-dev
|
||||
- libopencv-dev
|
||||
- libtbb-dev
|
||||
- libtiff-dev
|
||||
- libva-amdgpu-dev
|
||||
- libva2-amdgpu
|
||||
- mesa-amdgpu-va-drivers
|
||||
- libavcodec-dev
|
||||
- libavformat-dev
|
||||
- libavutil-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- protobuf-compiler
|
||||
- libprotoc-dev
|
||||
- libopencv-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- future==1.0.0
|
||||
- pytz==2022.1
|
||||
- numpy==1.23
|
||||
- google==3.0.0
|
||||
- protobuf==3.12.4
|
||||
- onnx==1.12.0
|
||||
- nnef==1.0.7
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- AMDMIGraphX
|
||||
- aomp
|
||||
- aomp-extras
|
||||
- clr
|
||||
- half
|
||||
- composable_kernel
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
@@ -33,21 +59,37 @@ parameters:
|
||||
- hipRAND
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- hipTensor
|
||||
- llvm-project
|
||||
- MIOpen
|
||||
- MIVisionX
|
||||
- rocm_smi_lib
|
||||
- rccl
|
||||
- rocAL
|
||||
- rocALUTION
|
||||
- rocBLAS
|
||||
- rocDecode
|
||||
- rocFFT
|
||||
- rocJPEG
|
||||
- rocPRIM
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- rocThrust
|
||||
- rocWMMA
|
||||
- rpp
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
- AMDMIGraphX
|
||||
- aomp
|
||||
- aomp-extras
|
||||
- clr
|
||||
- half
|
||||
- composable_kernel
|
||||
- hipBLAS
|
||||
- hipBLAS-common
|
||||
- hipBLASLt
|
||||
@@ -57,18 +99,30 @@ parameters:
|
||||
- hipRAND
|
||||
- hipSOLVER
|
||||
- hipSPARSE
|
||||
- hipTensor
|
||||
- llvm-project
|
||||
- MIOpen
|
||||
- MIVisionX
|
||||
- rocm_smi_lib
|
||||
- rccl
|
||||
- rocAL
|
||||
- rocALUTION
|
||||
- rocBLAS
|
||||
- rocDecode
|
||||
- rocFFT
|
||||
- rocminfo
|
||||
- rocPRIM
|
||||
- rocJPEG
|
||||
- rocprofiler-register
|
||||
- rocprofiler-sdk
|
||||
- ROCR-Runtime
|
||||
- rocRAND
|
||||
- rocSOLVER
|
||||
- rocSPARSE
|
||||
- rocThrust
|
||||
- roctracer
|
||||
- rocWMMA
|
||||
- rpp
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
@@ -97,6 +151,11 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
parameters:
|
||||
cmakeVersion: '3.25.0'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -158,6 +217,10 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
parameters:
|
||||
cmakeVersion: '3.25.0'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -188,5 +251,6 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -36,16 +36,25 @@ parameters:
|
||||
- gfortran
|
||||
- git
|
||||
- libdrm-dev
|
||||
- liblapack-dev
|
||||
- libmsgpack-dev
|
||||
- libnuma-dev
|
||||
- libopenblas-dev
|
||||
- ninja-build
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- googletest
|
||||
- libgtest-dev
|
||||
- libgmock-dev
|
||||
- libboost-filesystem-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
- msgpack
|
||||
- joblib
|
||||
- "packaging>=22.0"
|
||||
- pytest
|
||||
- pytest-cmake
|
||||
- --upgrade
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
@@ -98,12 +107,12 @@ jobs:
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-latest.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
@@ -134,12 +143,33 @@ jobs:
|
||||
rocm-libraries | ${{ job.os }} | ${{ job.target }} | $(DAY_STRING)
|
||||
rocm-libraries | ${{ job.os }} | ${{ job.target }}
|
||||
rocm-libraries | ${{ job.os }}
|
||||
- task: Bash@3
|
||||
displayName: Add paths for CMake and Python site-packages binaries
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
echo "##vso[task.setvariable variable=PytestCmakePath]$USER_BASE/share/Pytest/cmake"
|
||||
displayName: Set cmake configure paths
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DROCM_LIBRARIES_SUPERBUILD=ON
|
||||
-GNinja
|
||||
-D CMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor;$(PytestCmakePath)
|
||||
-D CMAKE_INCLUDE_PATH=$(Agent.BuildDirectory)/rocm/llvm/include
|
||||
-D CMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-D CMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-D CMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-G Ninja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocm_smi_lib
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -32,6 +51,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocm_smi_lib_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
${{ if eq(job.os, 'ubuntu2404') }}:
|
||||
vmImage: 'ubuntu-24.04'
|
||||
@@ -55,8 +78,10 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -65,51 +90,56 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
# parameters:
|
||||
# aptPackages: ${{ parameters.aptPackages }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_smi_lib
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocm_smi_lib_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocm_smi_lib_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: 'sudo ./rocm/share/rocm_smi/rsmitst_tests/rsmitst'
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocminfo
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -40,7 +59,11 @@ parameters:
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocminfo_build_${{ job.os }}
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
@@ -62,14 +85,18 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
@@ -78,65 +105,71 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocminfo_test_${{ job.target }}
|
||||
dependsOn: rocminfo_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocminfo
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocminfo'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_agent_enumerator
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocm_agent_enumerator'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocminfo_test_${{ job.target }}
|
||||
dependsOn: rocminfo_build_${{ job.os }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
parameters:
|
||||
runRocminfo: false
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocminfo'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocm_agent_enumerator
|
||||
testDir: '$(Agent.BuildDirectory)'
|
||||
testExecutable: './rocm/bin/rocm_agent_enumerator'
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
registerROCmPackages: true
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler-compute
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -36,6 +55,7 @@ parameters:
|
||||
- pymongo
|
||||
- pyyaml
|
||||
- setuptools
|
||||
- sqlalchemy
|
||||
- tabulate
|
||||
- textual
|
||||
- textual_plotext
|
||||
@@ -45,6 +65,13 @@ parameters:
|
||||
- pytest
|
||||
- pytest-cov
|
||||
- pytest-xdist
|
||||
- name: rocmDependencies
|
||||
type: object
|
||||
default:
|
||||
- clr
|
||||
- llvm-project
|
||||
- ROCR-Runtime
|
||||
- rocprofiler-sdk
|
||||
- name: rocmTestDependencies
|
||||
type: object
|
||||
default:
|
||||
@@ -78,9 +105,15 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_compute_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
vmImage: ${{ variables.BASE_BUILD_POOL }}
|
||||
workspace:
|
||||
@@ -94,15 +127,27 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -111,78 +156,83 @@ jobs:
|
||||
# pipModules: ${{ parameters.pipModules }}
|
||||
# gpuTarget: ${{ job.target }}
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_compute_test_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: rocprofiler_compute_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PYTHON_VERSION
|
||||
value: 3.10
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DENABLE_TESTS=ON
|
||||
-DINSTALL_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-compute
|
||||
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
|
||||
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_compute_test_${{ job.target }}
|
||||
timeoutInMinutes: 120
|
||||
dependsOn: rocprofiler_compute_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: PYTHON_VERSION
|
||||
value: 3.10
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add en_US.UTF-8 locale
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale
|
||||
locale -a
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_HIP_ARCHITECTURES=${{ job.target }}
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DENABLE_TESTS=ON
|
||||
-DINSTALL_TESTS=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Build.BinariesDirectory)/libexec/rocprofiler-compute
|
||||
testExecutable: ROCM_PATH=$(Agent.BuildDirectory)/rocm ctest
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -1,10 +1,29 @@
|
||||
parameters:
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler-sdk
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -60,23 +79,27 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
testJobs:
|
||||
- gfx942:
|
||||
target: gfx942
|
||||
- gfx90a:
|
||||
target: gfx90a
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx942 }
|
||||
- { os: ubuntu2204, packageManager: apt, target: gfx90a }
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_sdk_build_${{ job.target }}
|
||||
- job: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os}}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ variables.MEDIUM_BUILD_POOL }}
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
@@ -84,18 +107,23 @@ jobs:
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add Python site-packages binaries to path
|
||||
inputs:
|
||||
@@ -105,6 +133,8 @@ jobs:
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCPROFILER_BUILD_TESTS=ON
|
||||
@@ -114,9 +144,13 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
# - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
@@ -126,62 +160,72 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_sdk_test_${{ job.target }}
|
||||
dependsOn: rocprofiler_sdk_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Add Python and ROCm binaries to path
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCPROFILER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_BUILD_SAMPLES=ON
|
||||
-DROCPROFILER_BUILD_RELEASE=ON
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-sdk
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_sdk_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: rocprofiler_sdk_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add Python and ROCm binaries to path
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
USER_BASE=$(python3 -m site --user-base)
|
||||
echo "##vso[task.prependpath]$USER_BASE/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DROCPROFILER_BUILD_TESTS=ON
|
||||
-DROCPROFILER_BUILD_SAMPLES=ON
|
||||
-DROCPROFILER_BUILD_RELEASE=ON
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH}}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
testDir: $(Agent.BuildDirectory)/build
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
|
||||
@@ -6,6 +6,25 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: componentName
|
||||
type: string
|
||||
default: rocprofiler-systems
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -87,6 +106,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: rocprofiler_systems_build_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -105,6 +128,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
@@ -136,12 +160,16 @@ jobs:
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
componentName: ${{ parameters.componentName }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
@@ -151,85 +179,93 @@ jobs:
|
||||
registerROCmPackages: true
|
||||
extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_systems_test_${{ job.target }}
|
||||
dependsOn: rocprofiler_systems_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
timeoutInMinutes: 180
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
# build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
|
||||
extraBuildFlags: >-
|
||||
-DROCPROFSYS_BUILD_TESTING=ON
|
||||
-DROCPROFSYS_BUILD_DYNINST=ON
|
||||
-DROCPROFSYS_BUILD_LIBUNWIND=ON
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="openmp-target"
|
||||
-DDYNINST_BUILD_TBB=ON
|
||||
-DDYNINST_BUILD_ELFUTILS=ON
|
||||
-DDYNINST_BUILD_LIBIBERTY=ON
|
||||
-DDYNINST_BUILD_BOOST=ON
|
||||
-DROCPROFSYS_USE_PAPI=ON
|
||||
-DROCPROFSYS_USE_MPI=ON
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- task: Bash@3
|
||||
displayName: Set up rocprofiler-systems env
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: source share/rocprofiler-systems/setup-env.sh
|
||||
workingDirectory: build
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofiler-systems
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
registerROCmPackages: true
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: rocprofiler_systems_test_${{ job.target }}
|
||||
dependsOn: rocprofiler_systems_build_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
timeoutInMinutes: 180
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
pool:
|
||||
name: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- task: Bash@3
|
||||
displayName: Add ROCm binaries to PATH
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/bin"
|
||||
echo "##vso[task.prependpath]$(Agent.BuildDirectory)/rocm/llvm/bin"
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/s/projects/rocprofiler-systems
|
||||
# build flags reference: https://rocm.docs.amd.com/projects/omnitrace/en/latest/install/install.html
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_INSTALL_PREFIX=$(Agent.BuildDirectory)/rocprofiler-systems
|
||||
-DROCPROFSYS_USE_PYTHON=ON
|
||||
-DROCPROFSYS_BUILD_TESTING=ON
|
||||
-DROCPROFSYS_BUILD_DYNINST=ON
|
||||
-DROCPROFSYS_BUILD_LIBUNWIND=ON
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="openmp-target"
|
||||
-DDYNINST_BUILD_TBB=ON
|
||||
-DDYNINST_BUILD_ELFUTILS=ON
|
||||
-DDYNINST_BUILD_LIBIBERTY=ON
|
||||
-DDYNINST_BUILD_BOOST=ON
|
||||
-DROCPROFSYS_USE_PAPI=ON
|
||||
-DROCPROFSYS_USE_MPI=ON
|
||||
-DCMAKE_CXX_FLAGS=-I$(Agent.BuildDirectory)/rocm/include/rocjpeg
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-GNinja
|
||||
- task: Bash@3
|
||||
displayName: Set up rocprofiler-systems env
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: source $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems/setup-env.sh
|
||||
workingDirectory: $(Agent.BuildDirectory)/rocprofiler-systems/share/rocprofiler-systems
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testDir: $(Agent.BuildDirectory)/s/build/tests/
|
||||
testParameters: '--output-on-failure'
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
registerROCmPackages: true
|
||||
gpuTarget: ${{ job.target }}
|
||||
extraPaths: /home/user/workspace/rocm/bin:/home/user/workspace/rocm/llvm/bin
|
||||
|
||||
@@ -8,6 +8,22 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -70,6 +86,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -94,6 +114,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
@@ -108,6 +129,8 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -115,6 +138,7 @@ jobs:
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_MODULE_PATH=$(Build.SourcesDirectory)/cmake_modules;$(Agent.BuildDirectory)/rocm/lib/cmake;$(Agent.BuildDirectory)/rocm/lib/cmake/hip;$(Agent.BuildDirectory)/rocm/lib64/cmake;$(Agent.BuildDirectory)/rocm/lib64/cmake/hip
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm;$(Agent.BuildDirectory)/vendor"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
-DENABLE_LDCONFIG=OFF
|
||||
-DUSE_PROF_API=1
|
||||
@@ -122,10 +146,13 @@ jobs:
|
||||
multithreadFlag: -- -j32
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
@@ -139,63 +166,68 @@ jobs:
|
||||
- HIP_ROCCLR_HOME:::/home/user/workspace/rocm
|
||||
- ROCM_PATH:::/home/user/workspace/rocm
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/lib/rocprofiler:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1/test:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV1
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1
|
||||
testExecutable: ./run.sh
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV2
|
||||
testDir: $(Agent.BuildDirectory)/rocm
|
||||
testExecutable: share/rocprofiler/tests/runUnitTests
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
- name: ROCM_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: $(Agent.BuildDirectory)/rocm/lib/rocprofiler:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1/test:$(Agent.BuildDirectory)/rocm/share/rocprofiler/tests
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV1
|
||||
testDir: $(Agent.BuildDirectory)/rocm/share/rocprofiler/tests-v1
|
||||
testExecutable: ./run.sh
|
||||
testParameters: ''
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: rocprofilerV2
|
||||
testDir: $(Agent.BuildDirectory)/rocm
|
||||
testExecutable: share/rocprofiler/tests/runUnitTests
|
||||
testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
|
||||
@@ -8,6 +8,22 @@ parameters:
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
# monorepo related parameters
|
||||
- name: sparseCheckoutDir
|
||||
type: string
|
||||
default: ''
|
||||
- name: triggerDownstreamJobs
|
||||
type: boolean
|
||||
default: false
|
||||
- name: downstreamAggregateNames
|
||||
type: string
|
||||
default: ''
|
||||
- name: buildDependsOn
|
||||
type: object
|
||||
default: null
|
||||
- name: unifiedBuild
|
||||
type: boolean
|
||||
default: false
|
||||
# set to true if doing full build of ROCm stack
|
||||
# and dependencies are pulled from same pipeline
|
||||
- name: aggregatePipeline
|
||||
@@ -65,6 +81,10 @@ parameters:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
${{ if parameters.buildDependsOn }}:
|
||||
dependsOn:
|
||||
- ${{ each build in parameters.buildDependsOn }}:
|
||||
- ${{ build }}_${{ job.os }}_${{ job.target }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -87,6 +107,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
|
||||
parameters:
|
||||
checkoutRepo: ${{ parameters.checkoutRepo }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
@@ -94,6 +115,8 @@ jobs:
|
||||
gpuTarget: ${{ job.target }}
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# the linker flags will not affect ubuntu2204 builds as the paths do not exist
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
@@ -109,10 +132,13 @@ jobs:
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
os: ${{ job.os }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
|
||||
@@ -123,53 +149,57 @@ jobs:
|
||||
# gpuTarget: ${{ job.target }}
|
||||
# registerROCmPackages: true
|
||||
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: roctracer
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
|
||||
testParameters: ''
|
||||
testDir: $(Agent.BuildDirectory)
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
- ${{ if eq(parameters.unifiedBuild, False) }}:
|
||||
- ${{ each job in parameters.jobMatrix.testJobs }}:
|
||||
- job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
|
||||
dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
|
||||
condition:
|
||||
and(succeeded(),
|
||||
eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
|
||||
not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
|
||||
eq(${{ parameters.aggregatePipeline }}, False)
|
||||
)
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool: ${{ job.target }}_test_pool
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
registerROCmPackages: true
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
|
||||
parameters:
|
||||
preTargetFilter: ${{ parameters.componentName }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
|
||||
parameters:
|
||||
checkoutRef: ${{ parameters.checkoutRef }}
|
||||
dependencyList: ${{ parameters.rocmTestDependencies }}
|
||||
gpuTarget: ${{ job.target }}
|
||||
os: ${{ job.os }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
|
||||
parameters:
|
||||
componentName: ${{ parameters.componentName }}
|
||||
testExecutable: $(Agent.BuildDirectory)/rocm/share/roctracer/run_tests.sh
|
||||
testParameters: ''
|
||||
testDir: $(Agent.BuildDirectory)
|
||||
testPublishResults: false
|
||||
os: ${{ job.os }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
pipModules: ${{ parameters.pipModules }}
|
||||
environment: test
|
||||
gpuTarget: ${{ job.target }}
|
||||
registerROCmPackages: true
|
||||
|
||||
63
.azuredevops/dependencies/catch2.yml
Normal file
63
.azuredevops/dependencies/catch2.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: catch2Version
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: catch2_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone catch2 ${{ parameters.catch2Version }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/catchorg/Catch2.git -b ${{ parameters.catch2Version }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/Catch2/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/Catch2
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
63
.azuredevops/dependencies/cli11.yml
Normal file
63
.azuredevops/dependencies/cli11.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: cli11Version
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: cli11_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone cli11 ${{ parameters.cli11Version }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/CLIUtils/CLI11.git -b ${{ parameters.cli11Version }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/CLI11/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/CLI11
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
67
.azuredevops/dependencies/fmtlib.yml
Normal file
67
.azuredevops/dependencies/fmtlib.yml
Normal file
@@ -0,0 +1,67 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: fmtlibVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
- libfmt-dev
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: fmtlib_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone fmtlib ${{ parameters.fmtlibVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/fmtlib/fmt.git -b ${{ parameters.fmtlibVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/fmt/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/fmt
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DFMT_SYSTEM_HEADERS=ON
|
||||
-DFMT_INSTALL=ON
|
||||
-DFMT_TEST=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
64
.azuredevops/dependencies/libdivide.yml
Normal file
64
.azuredevops/dependencies/libdivide.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: libdivideVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: libdivide_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone libdivide ${{ parameters.libdivideVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/ridiculousfish/libdivide.git -b ${{ parameters.libdivideVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/libdivide/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/libdivide
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DLIBDIVIDE_BUILD_TESTS=OFF
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
71
.azuredevops/dependencies/spdlog.yml
Normal file
71
.azuredevops/dependencies/spdlog.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: spdlogVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: spdlog_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-vendor.yml
|
||||
parameters:
|
||||
dependencyList:
|
||||
- fmtlib
|
||||
- task: Bash@3
|
||||
displayName: Clone spdlog ${{ parameters.spdlogVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/gabime/spdlog.git -b ${{ parameters.spdlogVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/spdlog/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/spdlog
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/vendor
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DSPDLOG_USE_STD_FORMAT=OFF
|
||||
-DSPDLOG_FMT_EXTERNAL_HO=ON
|
||||
-DSPDLOG_INSTALL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
66
.azuredevops/dependencies/yamlcpp.yml
Normal file
66
.azuredevops/dependencies/yamlcpp.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
parameters:
|
||||
- name: checkoutRepo
|
||||
type: string
|
||||
default: 'self'
|
||||
- name: checkoutRef
|
||||
type: string
|
||||
default: ''
|
||||
- name: yamlcppVersion
|
||||
type: string
|
||||
default: ''
|
||||
- name: aptPackages
|
||||
type: object
|
||||
default:
|
||||
- cmake
|
||||
- git
|
||||
- ninja-build
|
||||
|
||||
- name: jobMatrix
|
||||
type: object
|
||||
default:
|
||||
buildJobs:
|
||||
- { os: ubuntu2204, packageManager: apt}
|
||||
- { os: almalinux8, packageManager: dnf}
|
||||
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobMatrix.buildJobs }}:
|
||||
- job: yamlcpp_${{ job.os }}
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
pool:
|
||||
vmImage: 'ubuntu-22.04'
|
||||
${{ if eq(job.os, 'almalinux8') }}:
|
||||
container:
|
||||
image: rocmexternalcicd.azurecr.io/manylinux228:latest
|
||||
endpoint: ContainerService3
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
- checkout: none
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
|
||||
parameters:
|
||||
aptPackages: ${{ parameters.aptPackages }}
|
||||
packageManager: ${{ job.packageManager }}
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: Bash@3
|
||||
displayName: Clone yaml-cpp ${{ parameters.yamlcppVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: git clone https://github.com/jbeder/yaml-cpp.git -b ${{ parameters.yamlcppVersion }}
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
cmakeBuildDir: $(Agent.BuildDirectory)/yaml-cpp/build
|
||||
cmakeSourceDir: $(Agent.BuildDirectory)/yaml-cpp
|
||||
useAmdclang: false
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DYAML_CPP_BUILD_TOOLS=OFF
|
||||
-DYAML_BUILD_SHARED_LIBS=OFF
|
||||
-DYAML_CPP_INSTALL=ON
|
||||
-GNinja
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
@@ -397,6 +397,7 @@ jobs:
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Pipeline Wheel Files'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*.whl'
|
||||
targetPath: $(Agent.BuildDirectory)
|
||||
|
||||
@@ -93,7 +93,7 @@ schedules:
|
||||
jobs:
|
||||
- ${{ each job in parameters.jobList }}:
|
||||
- job: nightly_${{ job.os }}_${{ job.target }}
|
||||
timeoutInMinutes: 90
|
||||
timeoutInMinutes: 120
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
@@ -226,6 +226,7 @@ jobs:
|
||||
cat Dockerfile
|
||||
- task: Docker@2
|
||||
displayName: Build and upload Docker image
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
containerRegistry: ContainerService3
|
||||
repository: 'nightly-${{ job.os }}-${{ job.target }}'
|
||||
|
||||
23
.azuredevops/tag-builds/catch2.yml
Normal file
23
.azuredevops/tag-builds/catch2.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: catch2Version
|
||||
type: string
|
||||
default: "v3.7.0"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/catch2.yml
|
||||
parameters:
|
||||
catch2Version: ${{ parameters.catch2Version }}
|
||||
23
.azuredevops/tag-builds/cli11.yml
Normal file
23
.azuredevops/tag-builds/cli11.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: cli11Version
|
||||
type: string
|
||||
default: "main"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/cli11.yml
|
||||
parameters:
|
||||
cli11Version: ${{ parameters.cli11Version }}
|
||||
23
.azuredevops/tag-builds/fmtlib.yml
Normal file
23
.azuredevops/tag-builds/fmtlib.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: fmtlibVersion
|
||||
type: string
|
||||
default: "11.1.3"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/fmtlib.yml
|
||||
parameters:
|
||||
fmtlibVersion: ${{ parameters.fmtlibVersion }}
|
||||
23
.azuredevops/tag-builds/libdivide.yml
Normal file
23
.azuredevops/tag-builds/libdivide.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: libdivideVersion
|
||||
type: string
|
||||
default: master
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/libdivide.yml
|
||||
parameters:
|
||||
libdivideVersion: ${{ parameters.libdivideVersion }}
|
||||
23
.azuredevops/tag-builds/spdlog.yml
Normal file
23
.azuredevops/tag-builds/spdlog.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: spdlogVersion
|
||||
type: string
|
||||
default: "v1.15.1"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/spdlog.yml
|
||||
parameters:
|
||||
spdlogVersion: ${{ parameters.spdlogVersion }}
|
||||
24
.azuredevops/tag-builds/yaml-cpp.yml
Normal file
24
.azuredevops/tag-builds/yaml-cpp.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml
|
||||
|
||||
parameters:
|
||||
- name: yamlcppVersion
|
||||
type: string
|
||||
default: "0.8.0"
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
trigger: none
|
||||
pr: none
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_DEPENDENCIES_PATH }}/yamlcpp.yml
|
||||
parameters:
|
||||
yamlcppVersion: ${{ parameters.yamlcppVersion }}
|
||||
|
||||
@@ -24,8 +24,12 @@ parameters:
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.componentName }}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
${{ if eq(parameters.componentName, 'clr') }}:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*amd*' # filter out nvidia clr artifacts
|
||||
${{ else }}:
|
||||
itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
|
||||
targetPath: '$(Pipeline.Workspace)/d'
|
||||
allowPartiallySucceededBuilds: true
|
||||
${{ if parameters.aggregatePipeline }}:
|
||||
|
||||
@@ -20,7 +20,7 @@ steps:
|
||||
retryCountOnTaskFailure: 3
|
||||
fetchFilter: blob:none
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }}
|
||||
sparseCheckoutDirectories: ${{ parameters.sparseCheckoutDir }} shared
|
||||
path: sparse
|
||||
- ${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
- task: Bash@3
|
||||
|
||||
@@ -10,6 +10,7 @@ steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (apt)'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -20,7 +21,8 @@ steps:
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get update'
|
||||
displayName: 'APT update and install packages'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -28,15 +30,6 @@ steps:
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/default.list
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get fix'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install
|
||||
- ${{ if gt(length(parameters.aptPackages), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'sudo apt-get install ...'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes update && \
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-broken install && \
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get --yes --fix-missing install ${{ join(' ', parameters.aptPackages) }}
|
||||
|
||||
@@ -5,51 +5,28 @@ parameters:
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Get aqlprofile package name
|
||||
inputs:
|
||||
targetType: inline
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb")
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
export packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1)
|
||||
echo "##vso[task.setvariable variable=packageName;isreadonly=true]$packageName"
|
||||
- task: Bash@3
|
||||
displayName: 'Download aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$(packageName)
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$(packageName)
|
||||
- task: Bash@3
|
||||
displayName: 'Extract aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
${{ if eq(parameters.os, 'ubuntu2204') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
dpkg-deb -R $(packageName) hsa-amd-aqlprofile
|
||||
${{ if eq(parameters.os, 'almalinux8') }}:
|
||||
script: |
|
||||
mkdir hsa-amd-aqlprofile
|
||||
sudo dnf -y install rpm-build cpio
|
||||
rpm2cpio $(packageName) | (cd hsa-amd-aqlprofile && cpio -idmv)
|
||||
- task: Bash@3
|
||||
displayName: 'Copy aqlprofile files'
|
||||
displayName: Download and install aqlprofile
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
workingDirectory: $(Agent.BuildDirectory)
|
||||
script: |
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
- task: Bash@3
|
||||
displayName: 'Clean up aqlprofile'
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: rm -rf hsa-amd-aqlprofile $(packageName)
|
||||
workingDirectory: '$(Pipeline.Workspace)'
|
||||
set -e
|
||||
if [ "${{ parameters.os }}" = "ubuntu2204" ]; then
|
||||
packageName=$(curl -s https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/ | grep -oP "href=\"\K[^\"]*$(lsb_release -rs)[^\"]*\.deb") && \
|
||||
wget -nv https://repo.radeon.com/rocm/apt/$(REPO_RADEON_VERSION)/pool/main/h/hsa-amd-aqlprofile/$packageName && \
|
||||
mkdir -p hsa-amd-aqlprofile && \
|
||||
dpkg-deb -R $packageName hsa-amd-aqlprofile
|
||||
elif [ "${{ parameters.os }}" = "almalinux8" ]; then
|
||||
sudo dnf -y install rpm-build cpio && \
|
||||
packageName=$(curl -s https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/ | grep -oP "hsa-amd-aqlprofile-[^\"]+\.rpm" | head -n1) && \
|
||||
wget -nv https://repo.radeon.com/rocm/rhel8/$(REPO_RADEON_VERSION)/main/$packageName && \
|
||||
mkdir -p hsa-amd-aqlprofile && \
|
||||
rpm2cpio $packageName | (cd hsa-amd-aqlprofile && cpio -idmv)
|
||||
else
|
||||
echo "Unsupported OS: ${{ parameters.os }}"
|
||||
exit 1
|
||||
fi && \
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm && \
|
||||
cp -R hsa-amd-aqlprofile/opt/rocm-*/* $(Agent.BuildDirectory)/rocm && \
|
||||
rm -rf hsa-amd-aqlprofile $packageName
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
parameters:
|
||||
- name: cmakeVersion
|
||||
type: string
|
||||
default: '3.31.0'
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
displayName: Install CMake 3.31
|
||||
displayName: Install CMake ${{ parameters.cmakeVersion }}
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
CMAKE_VERSION=3.31.0
|
||||
CMAKE_VERSION=${{ parameters.cmakeVersion }}
|
||||
CMAKE_ROOT="$(Pipeline.Workspace)/cmake"
|
||||
|
||||
echo "Downloading CMake $CMAKE_VERSION..."
|
||||
@@ -54,13 +54,16 @@ parameters:
|
||||
libfftw3-dev: fftw-devel
|
||||
libfmt-dev: fmt-devel
|
||||
libgmp-dev: gmp-devel
|
||||
liblapack-dev: lapack-devel
|
||||
liblzma-dev: xz-devel
|
||||
libmpfr-dev: mpfr-devel
|
||||
libmsgpack-dev: msgpack-devel
|
||||
libncurses5-dev: ncurses-devel
|
||||
libnuma-dev: numactl-devel
|
||||
libopenblas-dev: openblas-devel
|
||||
libopenmpi-dev: openmpi-devel
|
||||
libpci-dev: libpciaccess-devel
|
||||
libsimde-dev: simde-devel
|
||||
libssl-dev: openssl-devel
|
||||
# note: libstdc++-devel is in the base packages list
|
||||
libsystemd-dev: systemd-devel
|
||||
@@ -87,6 +90,7 @@ steps:
|
||||
- ${{ if eq(parameters.registerROCmPackages, true) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Register AMDGPU & ROCm repos (dnf)'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -107,12 +111,13 @@ steps:
|
||||
sudo dnf makecache
|
||||
- task: Bash@3
|
||||
displayName: 'Install base dnf packages'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
sudo dnf config-manager --set-enabled powertools
|
||||
# rpm fusion free repo for some dependencies
|
||||
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
|
||||
sudo dnf config-manager --set-enabled powertools && \
|
||||
sudo dnf -y install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm && \
|
||||
sudo dnf -y install ${{ join(' ', parameters.basePackages) }}
|
||||
- task: Bash@3
|
||||
displayName: 'Check gcc environment'
|
||||
@@ -126,6 +131,7 @@ steps:
|
||||
g++ -print-file-name=libstdc++.so
|
||||
- task: Bash@3
|
||||
displayName: 'Set python 3.11 as default'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
@@ -140,18 +146,20 @@ steps:
|
||||
- ${{ if eq(pkg, 'ninja-build') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'Install ninja 1.11.1'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip
|
||||
sudo dnf -y install unzip
|
||||
unzip ninja-linux.zip
|
||||
sudo mv ninja /usr/local/bin/ninja
|
||||
sudo chmod +x /usr/local/bin/ninja
|
||||
sudo dnf -y install unzip && \
|
||||
curl -LO https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip && \
|
||||
unzip ninja-linux.zip && \
|
||||
sudo mv ninja /usr/local/bin/ninja && \
|
||||
sudo chmod +x /usr/local/bin/ninja && \
|
||||
echo "##vso[task.prependpath]/usr/local/bin"
|
||||
- ${{ if ne(parameters.aptToDnfMap[pkg], '') }}:
|
||||
- task: Bash@3
|
||||
displayName: 'dnf install ${{ parameters.aptToDnfMap[pkg] }}'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: |
|
||||
|
||||
@@ -27,6 +27,7 @@ steps:
|
||||
- ${{ if gt(length(parameters.pipModules), 0) }}:
|
||||
- task: Bash@3
|
||||
displayName: 'pip install ...'
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
targetType: inline
|
||||
script: python3 -m pip install -v --force-reinstall ${{ join(' ', parameters.pipModules) }}
|
||||
|
||||
@@ -35,8 +35,8 @@ parameters:
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
amdsmi:
|
||||
pipelineId: 99
|
||||
developBranch: amd-staging
|
||||
pipelineId: 376
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
aomp-extras:
|
||||
pipelineId: 111
|
||||
@@ -46,9 +46,13 @@ parameters:
|
||||
pipelineId: 115
|
||||
developBranch: aomp-dev
|
||||
hasGpuTarget: false
|
||||
aqlprofile:
|
||||
pipelineId: 365
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
clr:
|
||||
pipelineId: 145
|
||||
developBranch: amd-staging
|
||||
pipelineId: 335
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
composable_kernel:
|
||||
pipelineId: 86
|
||||
@@ -59,12 +63,12 @@ parameters:
|
||||
developBranch: rocm
|
||||
hasGpuTarget: false
|
||||
HIP:
|
||||
pipelineId: 93
|
||||
developBranch: amd-staging
|
||||
pipelineId: 335
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
hip-tests:
|
||||
pipelineId: 233
|
||||
developBranch: amd-staging
|
||||
pipelineId: 362
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
hipBLAS:
|
||||
pipelineId: 317
|
||||
@@ -111,7 +115,7 @@ parameters:
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
hipTensor:
|
||||
pipelineId: 105
|
||||
pipelineId: 374
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
llvm-project:
|
||||
@@ -126,13 +130,17 @@ parameters:
|
||||
pipelineId: 80
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
origami:
|
||||
pipelineId: 364
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rccl:
|
||||
pipelineId: 107
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rdc:
|
||||
pipelineId: 100
|
||||
developBranch: amd-staging
|
||||
pipelineId: 360
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocAL:
|
||||
pipelineId: 151
|
||||
@@ -171,16 +179,16 @@ parameters:
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocm-core:
|
||||
pipelineId: 103
|
||||
developBranch: master
|
||||
pipelineId: 349
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocm-examples:
|
||||
pipelineId: 216
|
||||
developBranch: amd-staging
|
||||
hasGpuTarget: true
|
||||
rocminfo:
|
||||
pipelineId: 91
|
||||
developBranch: amd-staging
|
||||
pipelineId: 356
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocMLIR:
|
||||
pipelineId: 229
|
||||
@@ -195,19 +203,19 @@ parameters:
|
||||
developBranch: master
|
||||
hasGpuTarget: false
|
||||
rocm_smi_lib:
|
||||
pipelineId: 96
|
||||
developBranch: amd-staging
|
||||
pipelineId: 358
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocPRIM:
|
||||
pipelineId: 273
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler:
|
||||
pipelineId: 143
|
||||
developBranch: amd-staging
|
||||
pipelineId: 329
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler-compute:
|
||||
pipelineId: 257
|
||||
pipelineId: 344
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler-register:
|
||||
@@ -215,20 +223,20 @@ parameters:
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocprofiler-sdk:
|
||||
pipelineId: 246
|
||||
developBranch: amd-staging
|
||||
pipelineId: 347
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocprofiler-systems:
|
||||
pipelineId: 255
|
||||
developBranch: amd-staging
|
||||
pipelineId: 345
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocPyDecode:
|
||||
pipelineId: 239
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
ROCR-Runtime:
|
||||
pipelineId: 10
|
||||
developBranch: amd-staging
|
||||
pipelineId: 354
|
||||
developBranch: develop
|
||||
hasGpuTarget: false
|
||||
rocRAND:
|
||||
pipelineId: 274
|
||||
@@ -251,11 +259,11 @@ parameters:
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
roctracer:
|
||||
pipelineId: 141
|
||||
developBranch: amd-staging
|
||||
pipelineId: 331
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rocWMMA:
|
||||
pipelineId: 109
|
||||
pipelineId: 370
|
||||
developBranch: develop
|
||||
hasGpuTarget: true
|
||||
rpp:
|
||||
|
||||
@@ -8,15 +8,20 @@ parameters:
|
||||
type: object
|
||||
default:
|
||||
boost: 250
|
||||
catch2: 343
|
||||
fmtlib: 341
|
||||
grpc: 72
|
||||
gtest: 73
|
||||
half560: 68
|
||||
lapack: 69
|
||||
libdivide: 342
|
||||
spdlog: 340
|
||||
|
||||
steps:
|
||||
- ${{ each dependency in parameters.dependencyList }}:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ dependency }}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
project: ROCm-CI
|
||||
buildType: specific
|
||||
@@ -28,7 +33,7 @@ steps:
|
||||
inputs:
|
||||
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
|
||||
destinationFolder: $(Agent.BuildDirectory)/vendor
|
||||
cleanDestinationFolder: true
|
||||
cleanDestinationFolder: false
|
||||
overwriteExistingFiles: true
|
||||
- task: DeleteFiles@1
|
||||
displayName: Clean up ${{ dependency }}
|
||||
|
||||
@@ -33,6 +33,7 @@ parameters:
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: Download ${{ parameters.preTargetFilter}}*${{ parameters.os }}_${{ parameters.gpuTarget}}*${{ parameters.postTargetFilter}}
|
||||
retryCountOnTaskFailure: 3
|
||||
inputs:
|
||||
${{ if eq(parameters.buildType, 'specific') }}:
|
||||
buildType: specific
|
||||
|
||||
@@ -7,6 +7,7 @@ steps:
|
||||
- task: Bash@3
|
||||
name: downloadCKBuild
|
||||
displayName: Download specific CK build
|
||||
retryCountOnTaskFailure: 3
|
||||
env:
|
||||
CXX: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
CC: $(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
@@ -69,20 +70,29 @@ steps:
|
||||
|
||||
RETRIES=0
|
||||
MAX_RETRIES=5
|
||||
until wget -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip; do
|
||||
RETRIES=$((RETRIES+1))
|
||||
if [[ $RETRIES -ge $MAX_RETRIES ]]; then
|
||||
echo "Failed to download CK artifact after $MAX_RETRIES attempts."
|
||||
exit 1
|
||||
SUCCESS=false
|
||||
while [ $RETRIES -lt $MAX_RETRIES ]; do
|
||||
wget -nv $ARTIFACT_URL -O $(System.ArtifactsDirectory)/ck.zip && \
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory) && \
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm && \
|
||||
tar -zxvf $(System.ArtifactsDirectory)/composable_kernel*/*.tar.gz -C $(Agent.BuildDirectory)/rocm && \
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/composable_kernel*
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
SUCCESS=true
|
||||
echo "Successfully downloaded CK."
|
||||
break
|
||||
else
|
||||
RETRIES=$((RETRIES + 1))
|
||||
echo "Failed to download CK on attempt $RETRIES/$MAX_RETRIES, retrying..."
|
||||
sleep 1
|
||||
fi
|
||||
echo "Download failed, retrying ($RETRIES/$MAX_RETRIES)..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
unzip $(System.ArtifactsDirectory)/ck.zip -d $(System.ArtifactsDirectory)
|
||||
mkdir -p $(Agent.BuildDirectory)/rocm
|
||||
tar -zxvf $(System.ArtifactsDirectory)/composable_kernel*/*.tar.gz -C $(Agent.BuildDirectory)/rocm
|
||||
rm -r $(System.ArtifactsDirectory)/ck.zip $(System.ArtifactsDirectory)/composable_kernel*
|
||||
if [ "$SUCCESS" = false ]; then
|
||||
echo "ERROR: failed to download CK after $MAX_RETRIES attempts."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $EXIT_CODE -ne 0 ]]; then
|
||||
BUILD_COMMIT=$(curl -s $AZ_API/build/builds/$CK_BUILD_ID | jq '.sourceVersion' | tr -d '"')
|
||||
|
||||
@@ -13,7 +13,7 @@ parameters:
|
||||
default: ctest
|
||||
- name: testParameters
|
||||
type: string
|
||||
default: --output-on-failure --force-new-ctest-process --output-junit test_output.xml
|
||||
default: --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml
|
||||
- name: extraTestParameters
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
@@ -28,13 +28,13 @@ variables:
|
||||
- name: GFX90A_TEST_POOL
|
||||
value: gfx90a_test_pool
|
||||
- name: LATEST_RELEASE_VERSION
|
||||
value: 6.4.2
|
||||
value: 6.4.3
|
||||
- name: REPO_RADEON_VERSION
|
||||
value: 6.4.2
|
||||
value: 6.4.3
|
||||
- name: NEXT_RELEASE_VERSION
|
||||
value: 7.0.0
|
||||
- name: LATEST_RELEASE_TAG
|
||||
value: rocm-6.4.2
|
||||
value: rocm-6.4.3
|
||||
- name: DOCKER_SKIP_GFX
|
||||
value: gfx90a
|
||||
- name: COMPOSABLE_KERNEL_PIPELINE_ID
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
.venv
|
||||
.vscode
|
||||
build
|
||||
__pycache__
|
||||
|
||||
# documentation artifacts
|
||||
_build/
|
||||
|
||||
116
.wordlist.txt
116
.wordlist.txt
@@ -5,6 +5,7 @@ ACEs
|
||||
ACS
|
||||
AccVGPR
|
||||
AccVGPRs
|
||||
AITER
|
||||
ALU
|
||||
AllReduce
|
||||
AMD
|
||||
@@ -26,6 +27,7 @@ ASICs
|
||||
ASan
|
||||
ASAN
|
||||
ASm
|
||||
Async
|
||||
ATI
|
||||
atomicRMW
|
||||
AddressSanitizer
|
||||
@@ -33,6 +35,7 @@ AlexNet
|
||||
Andrej
|
||||
Arb
|
||||
Autocast
|
||||
autograd
|
||||
BARs
|
||||
BatchNorm
|
||||
BLAS
|
||||
@@ -42,6 +45,7 @@ Blit
|
||||
Blockwise
|
||||
Bluefield
|
||||
Bootloader
|
||||
Broadcom
|
||||
CAS
|
||||
CCD
|
||||
CDNA
|
||||
@@ -61,6 +65,7 @@ CPU
|
||||
CPUs
|
||||
Cron
|
||||
CSC
|
||||
CSDATA
|
||||
CSE
|
||||
CSV
|
||||
CSn
|
||||
@@ -70,9 +75,11 @@ CU
|
||||
CUDA
|
||||
CUs
|
||||
CXX
|
||||
CX
|
||||
Cavium
|
||||
CentOS
|
||||
ChatGPT
|
||||
Cholesky
|
||||
CoRR
|
||||
Codespaces
|
||||
Commitizen
|
||||
@@ -80,10 +87,13 @@ CommonMark
|
||||
Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CountOnes
|
||||
CuPy
|
||||
customizable
|
||||
da
|
||||
Dashboarding
|
||||
Dataloading
|
||||
dataflows
|
||||
DBRX
|
||||
DDR
|
||||
DF
|
||||
@@ -96,6 +106,7 @@ DIMM
|
||||
DKMS
|
||||
DL
|
||||
DMA
|
||||
DOMContentLoaded
|
||||
DNN
|
||||
DNNL
|
||||
DPM
|
||||
@@ -114,15 +125,24 @@ Dependabot
|
||||
Deprecations
|
||||
DevCap
|
||||
DirectX
|
||||
Disaggregated
|
||||
disaggregated
|
||||
Dockerfile
|
||||
Dockerized
|
||||
Doxygen
|
||||
dropless
|
||||
ELMo
|
||||
ENDPGM
|
||||
EPYC
|
||||
ESXi
|
||||
EP
|
||||
EoS
|
||||
etcd
|
||||
equalto
|
||||
fas
|
||||
FBGEMM
|
||||
FiLM
|
||||
FIFOs
|
||||
FFT
|
||||
FFTs
|
||||
FFmpeg
|
||||
@@ -135,15 +155,19 @@ Filesystem
|
||||
FindDb
|
||||
Flang
|
||||
FlashAttention
|
||||
FlashInfer’s
|
||||
FlashInfer
|
||||
FluxBenchmark
|
||||
Fortran
|
||||
Fuyu
|
||||
GALB
|
||||
GAT
|
||||
GATNE
|
||||
GCC
|
||||
GCD
|
||||
GCDs
|
||||
GCN
|
||||
GCNN
|
||||
GDB
|
||||
GDDR
|
||||
GDR
|
||||
@@ -153,6 +177,7 @@ GEMMs
|
||||
GFLOPS
|
||||
GFortran
|
||||
GFXIP
|
||||
GGUF
|
||||
Gemma
|
||||
GiB
|
||||
GIM
|
||||
@@ -161,15 +186,19 @@ Glibc
|
||||
GLXT
|
||||
Gloo
|
||||
GMI
|
||||
GNN
|
||||
GNNs
|
||||
GPG
|
||||
GPR
|
||||
GPT
|
||||
GPU
|
||||
GPU's
|
||||
GPUDirect
|
||||
GPUs
|
||||
Graphbolt
|
||||
GraphBolt
|
||||
GraphSage
|
||||
GRBM
|
||||
GRE
|
||||
GenAI
|
||||
GenZ
|
||||
GitHub
|
||||
@@ -194,8 +223,13 @@ HWE
|
||||
HWS
|
||||
Haswell
|
||||
Higgs
|
||||
href
|
||||
Hyperparameters
|
||||
HybridEngine
|
||||
Huggingface
|
||||
Hunyuan
|
||||
HunyuanVideo
|
||||
IB
|
||||
ICD
|
||||
ICT
|
||||
ICV
|
||||
@@ -204,8 +238,11 @@ IDEs
|
||||
IFWI
|
||||
IMDb
|
||||
IncDec
|
||||
instrSize
|
||||
interpolators
|
||||
IOMMU
|
||||
IOP
|
||||
IOPS
|
||||
IOPM
|
||||
IOV
|
||||
IRQ
|
||||
@@ -222,7 +259,9 @@ Intersphinx
|
||||
Intra
|
||||
Ioffe
|
||||
JAX's
|
||||
JAXLIB
|
||||
Jinja
|
||||
js
|
||||
JSON
|
||||
Jupyter
|
||||
KFD
|
||||
@@ -242,12 +281,16 @@ LLM
|
||||
LLMs
|
||||
LLVM
|
||||
LM
|
||||
logsumexp
|
||||
LRU
|
||||
LSAN
|
||||
LSan
|
||||
LTS
|
||||
LSTMs
|
||||
LteAll
|
||||
LanguageCrossEntropy
|
||||
LoRA
|
||||
MECO
|
||||
MEM
|
||||
MERCHANTABILITY
|
||||
MFMA
|
||||
@@ -266,6 +309,7 @@ MNIST
|
||||
MPI
|
||||
MPT
|
||||
MSVC
|
||||
mul
|
||||
MVAPICH
|
||||
MVFFR
|
||||
Makefile
|
||||
@@ -273,6 +317,7 @@ Makefiles
|
||||
Matplotlib
|
||||
Matrox
|
||||
MaxText
|
||||
MBT
|
||||
Megablocks
|
||||
Megatrends
|
||||
Megatron
|
||||
@@ -282,13 +327,18 @@ Meta's
|
||||
Miniconda
|
||||
MirroredStrategy
|
||||
Mixtral
|
||||
MLA
|
||||
MosaicML
|
||||
MoEs
|
||||
Mooncake
|
||||
Mpops
|
||||
Multicore
|
||||
Multithreaded
|
||||
mx
|
||||
MXFP
|
||||
MyEnvironment
|
||||
MyST
|
||||
NANOO
|
||||
NBIO
|
||||
NBIOs
|
||||
NCCL
|
||||
@@ -321,6 +371,7 @@ OFED
|
||||
OMM
|
||||
OMP
|
||||
OMPI
|
||||
OOM
|
||||
OMPT
|
||||
OMPX
|
||||
ONNX
|
||||
@@ -343,9 +394,11 @@ PCC
|
||||
PCI
|
||||
PCIe
|
||||
PEFT
|
||||
perf
|
||||
PEQT
|
||||
PIL
|
||||
PILImage
|
||||
PJRT
|
||||
POR
|
||||
PRNG
|
||||
PRs
|
||||
@@ -360,10 +413,12 @@ PowerEdge
|
||||
PowerShell
|
||||
Pretrained
|
||||
Pretraining
|
||||
Primus
|
||||
Profiler's
|
||||
PyPi
|
||||
Pytest
|
||||
PyTorch
|
||||
QPS
|
||||
Qcycles
|
||||
Qwen
|
||||
RAII
|
||||
@@ -425,7 +480,9 @@ SKU
|
||||
SKUs
|
||||
SLES
|
||||
SLURM
|
||||
Slurm
|
||||
SMEM
|
||||
SMFMA
|
||||
SMI
|
||||
SMT
|
||||
SPI
|
||||
@@ -437,18 +494,24 @@ SWE
|
||||
SerDes
|
||||
ShareGPT
|
||||
Shlens
|
||||
simd
|
||||
Skylake
|
||||
Softmax
|
||||
Spack
|
||||
SplitK
|
||||
Supermicro
|
||||
Szegedy
|
||||
TagRAM
|
||||
TCA
|
||||
TCC
|
||||
TCCs
|
||||
TCI
|
||||
TCIU
|
||||
TCP
|
||||
TCR
|
||||
TVM
|
||||
THREADGROUPS
|
||||
threadgroups
|
||||
TensorRT
|
||||
TensorFloat
|
||||
TF
|
||||
@@ -458,13 +521,12 @@ TPS
|
||||
TPU
|
||||
TPUs
|
||||
TSME
|
||||
Taichi
|
||||
Taichi's
|
||||
Tagram
|
||||
TensileLite
|
||||
TensorBoard
|
||||
TensorFlow
|
||||
TensorParallel
|
||||
TheRock
|
||||
ToC
|
||||
TorchAudio
|
||||
torchaudio
|
||||
@@ -482,6 +544,7 @@ UAC
|
||||
UC
|
||||
UCC
|
||||
UCX
|
||||
ud
|
||||
UE
|
||||
UIF
|
||||
UMC
|
||||
@@ -492,9 +555,11 @@ UltraChat
|
||||
Uncached
|
||||
Unittests
|
||||
Unhandled
|
||||
unwindowed
|
||||
VALU
|
||||
VBIOS
|
||||
VCN
|
||||
verl's
|
||||
VGPR
|
||||
VGPRs
|
||||
VM
|
||||
@@ -507,11 +572,13 @@ Vanhoucke
|
||||
Vulkan
|
||||
WGP
|
||||
WGPs
|
||||
WR
|
||||
WX
|
||||
WikiText
|
||||
Wojna
|
||||
Workgroups
|
||||
Writebacks
|
||||
xcc
|
||||
XCD
|
||||
XCDs
|
||||
XGBoost
|
||||
@@ -524,6 +591,7 @@ Xilinx
|
||||
Xnack
|
||||
Xteam
|
||||
YAML
|
||||
YAMLs
|
||||
YML
|
||||
YModel
|
||||
ZeRO
|
||||
@@ -531,6 +599,7 @@ ZenDNN
|
||||
accuracies
|
||||
activations
|
||||
addr
|
||||
addEventListener
|
||||
ade
|
||||
ai
|
||||
alloc
|
||||
@@ -546,6 +615,7 @@ autogenerated
|
||||
autotune
|
||||
avx
|
||||
awk
|
||||
az
|
||||
backend
|
||||
backends
|
||||
bb
|
||||
@@ -563,6 +633,7 @@ boson
|
||||
bosons
|
||||
br
|
||||
BrainFloat
|
||||
btn
|
||||
buildable
|
||||
bursty
|
||||
bzip
|
||||
@@ -574,17 +645,21 @@ centric
|
||||
changelog
|
||||
checkpointing
|
||||
chiplet
|
||||
classList
|
||||
cmake
|
||||
cmd
|
||||
coalescable
|
||||
codename
|
||||
collater
|
||||
comgr
|
||||
compat
|
||||
completers
|
||||
composable
|
||||
concretization
|
||||
config
|
||||
configs
|
||||
conformant
|
||||
const
|
||||
constructible
|
||||
convolutional
|
||||
convolves
|
||||
@@ -619,12 +694,14 @@ denoised
|
||||
denoises
|
||||
denormalize
|
||||
dequantization
|
||||
dequantized
|
||||
dequantizes
|
||||
deserializers
|
||||
detections
|
||||
dev
|
||||
devicelibs
|
||||
devsel
|
||||
dgl
|
||||
dimensionality
|
||||
disambiguates
|
||||
distro
|
||||
@@ -648,6 +725,7 @@ exascale
|
||||
executables
|
||||
ffmpeg
|
||||
filesystem
|
||||
forEach
|
||||
fortran
|
||||
fp
|
||||
framebuffer
|
||||
@@ -656,13 +734,16 @@ galb
|
||||
gcc
|
||||
gdb
|
||||
gemm
|
||||
getAttribute
|
||||
gfortran
|
||||
gfx
|
||||
githooks
|
||||
github
|
||||
globals
|
||||
gnupg
|
||||
gpu
|
||||
grayscale
|
||||
gx
|
||||
gzip
|
||||
heterogenous
|
||||
hipBLAS
|
||||
@@ -715,6 +796,7 @@ invariants
|
||||
invocating
|
||||
ipo
|
||||
jax
|
||||
json
|
||||
kdb
|
||||
kfd
|
||||
kv
|
||||
@@ -728,6 +810,7 @@ linalg
|
||||
linearized
|
||||
linter
|
||||
linux
|
||||
llm
|
||||
llvm
|
||||
lm
|
||||
localscratch
|
||||
@@ -735,6 +818,8 @@ logits
|
||||
lossy
|
||||
macOS
|
||||
matchers
|
||||
maxtext
|
||||
megablocks
|
||||
megatron
|
||||
microarchitecture
|
||||
migraphx
|
||||
@@ -763,6 +848,7 @@ opencv
|
||||
openmp
|
||||
openssl
|
||||
optimizers
|
||||
ol
|
||||
os
|
||||
oversubscription
|
||||
pageable
|
||||
@@ -770,10 +856,13 @@ pallas
|
||||
parallelization
|
||||
parallelizing
|
||||
param
|
||||
params
|
||||
parameterization
|
||||
passthrough
|
||||
pe
|
||||
perfcounter
|
||||
performant
|
||||
piecewise
|
||||
perl
|
||||
pragma
|
||||
pre
|
||||
@@ -794,11 +883,14 @@ preprocessing
|
||||
preprocessor
|
||||
prequantized
|
||||
prerequisites
|
||||
pretrain
|
||||
pretraining
|
||||
primus
|
||||
profiler
|
||||
profilers
|
||||
protobuf
|
||||
pseudorandom
|
||||
px
|
||||
py
|
||||
pytorch
|
||||
recommender
|
||||
@@ -806,9 +898,12 @@ recommenders
|
||||
quantile
|
||||
quantizer
|
||||
quasirandom
|
||||
querySelector
|
||||
querySelectorAll
|
||||
queueing
|
||||
qwen
|
||||
radeon
|
||||
rc
|
||||
rccl
|
||||
rdc
|
||||
rdma
|
||||
@@ -824,6 +919,8 @@ req
|
||||
resampling
|
||||
rescaling
|
||||
reusability
|
||||
rhel
|
||||
rl
|
||||
RLHF
|
||||
roadmap
|
||||
roc
|
||||
@@ -868,19 +965,24 @@ scalability
|
||||
scalable
|
||||
scipy
|
||||
seealso
|
||||
selectattr
|
||||
selectedTag
|
||||
sendmsg
|
||||
seqs
|
||||
serializers
|
||||
setAttribute
|
||||
sglang
|
||||
shader
|
||||
sharding
|
||||
sigmoid
|
||||
sles
|
||||
sm
|
||||
smi
|
||||
softmax
|
||||
spack
|
||||
spmm
|
||||
src
|
||||
stanford
|
||||
stochastically
|
||||
strided
|
||||
subcommand
|
||||
@@ -897,8 +999,10 @@ symlink
|
||||
symlinks
|
||||
sys
|
||||
tabindex
|
||||
targetContainer
|
||||
td
|
||||
tensorfloat
|
||||
tf
|
||||
th
|
||||
tokenization
|
||||
tokenize
|
||||
@@ -907,9 +1011,12 @@ tokenizer
|
||||
tokenizes
|
||||
toolchain
|
||||
toolchains
|
||||
topk
|
||||
toolset
|
||||
toolsets
|
||||
torchtitan
|
||||
torchvision
|
||||
tp
|
||||
tqdm
|
||||
tracebacks
|
||||
txt
|
||||
@@ -932,6 +1039,7 @@ USM
|
||||
UTCL
|
||||
UTIL
|
||||
utils
|
||||
UX
|
||||
vL
|
||||
variational
|
||||
vdi
|
||||
@@ -961,6 +1069,8 @@ writebacks
|
||||
wrreq
|
||||
wzo
|
||||
xargs
|
||||
xdit
|
||||
xDiT
|
||||
xGMI
|
||||
xPacked
|
||||
xz
|
||||
|
||||
2775
CHANGELOG.md
2775
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
827
RELEASE.md
827
RELEASE.md
File diff suppressed because it is too large
Load Diff
47
default.xml
47
default.xml
@@ -1,32 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.4.2"
|
||||
<default revision="refs/tags/rocm-7.1.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocm-examples" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="rocprofiler-compute" />
|
||||
<project name="rocprofiler-systems" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="HIP" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
<project name="hipother" />
|
||||
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||
<project name="half" />
|
||||
<project name="llvm-project" />
|
||||
@@ -37,36 +22,32 @@
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIOpen" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="Tensile" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipBLAS-common" />
|
||||
<project groups="mathlibs" name="hipBLAS" />
|
||||
<project groups="mathlibs" name="hipBLASLt" />
|
||||
<project groups="mathlibs" name="hipCUB" />
|
||||
<project groups="mathlibs" name="hipFFT" />
|
||||
<project groups="mathlibs" name="hipRAND" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipSPARSE" />
|
||||
<project groups="mathlibs" name="hipSPARSELt" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocBLAS" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<!-- The following components have been migrated to rocm-libraries:
|
||||
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||
rocSPARSE rocThrust Tensile -->
|
||||
<project groups="mathlibs" name="rocm-libraries" />
|
||||
<!-- The following components have been migrated to rocm-systems:
|
||||
aqlprofile clr hip hip-tests hipother
|
||||
rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute
|
||||
rocprofiler-register rocprofiler-sdk rocprofiler-systems
|
||||
rocprofiler rocr-runtime roctracer -->
|
||||
<project groups="mathlibs" name="rocm-systems" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocFFT" />
|
||||
<project groups="mathlibs" name="rocPRIM" />
|
||||
<project groups="mathlibs" name="rocRAND" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocSPARSE" />
|
||||
<project groups="mathlibs" name="rocThrust" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
|
||||
@@ -25,69 +25,69 @@ additional licenses. Please review individual repositories for more information.
|
||||
<!-- spellcheck-disable -->
|
||||
| Component | License |
|
||||
|:---------------------|:-------------------------|
|
||||
| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/clr) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/LICENSE.txt) |
|
||||
| [AMD Compute Language Runtime (CLR)](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/LICENSE.md) |
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
|
||||
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
|
||||
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [AQLprofile](https://github.com/ROCm/rocm-systems/tree/develop/projects/aqlprofile/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/aqlprofile/LICENSE.md) |
|
||||
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
|
||||
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
|
||||
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
|
||||
| [HIP](https://github.com/ROCm/HIP/) | [MIT](https://github.com/ROCm/HIP/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipamd](https://github.com/ROCm/clr/tree/amd-staging/hipamd) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/hipamd/LICENSE.txt) |
|
||||
| [hipBLAS](https://github.com/ROCm/hipBLAS/) | [MIT](https://github.com/ROCm/hipBLAS/blob/develop/LICENSE.md) |
|
||||
| [hipBLASLt](https://github.com/ROCm/hipBLASLt/) | [MIT](https://github.com/ROCm/hipBLASLt/blob/develop/LICENSE.md) |
|
||||
| [HIP](https://github.com/ROCm/rocm-systems/tree/develop/projects/hip/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/hip/LICENSE.md) |
|
||||
| [hipamd](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/hipamd/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/hipamd/LICENSE.md) |
|
||||
| [hipBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblas/LICENSE.md) |
|
||||
| [hipBLASLt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipblaslt/LICENSE.md) |
|
||||
| [HIPCC](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/hipcc) | [MIT](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/hipcc/LICENSE.txt) |
|
||||
| [hipCUB](https://github.com/ROCm/hipCUB/) | [Custom](https://github.com/ROCm/hipCUB/blob/develop/LICENSE.txt) |
|
||||
| [hipFFT](https://github.com/ROCm/hipFFT/) | [MIT](https://github.com/ROCm/hipFFT/blob/develop/LICENSE.md) |
|
||||
| [hipCUB](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipcub/) | [Custom](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipcub/LICENSE.txt) |
|
||||
| [hipFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipfft/LICENSE.md) |
|
||||
| [hipfort](https://github.com/ROCm/hipfort/) | [MIT](https://github.com/ROCm/hipfort/blob/develop/LICENSE) |
|
||||
| [HIPIFY](https://github.com/ROCm/HIPIFY/) | [MIT](https://github.com/ROCm/HIPIFY/blob/amd-staging/LICENSE.txt) |
|
||||
| [hipRAND](https://github.com/ROCm/hipRAND/) | [MIT](https://github.com/ROCm/hipRAND/blob/develop/LICENSE.txt) |
|
||||
| [hipSOLVER](https://github.com/ROCm/hipSOLVER/) | [MIT](https://github.com/ROCm/hipSOLVER/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSE](https://github.com/ROCm/hipSPARSE/) | [MIT](https://github.com/ROCm/hipSPARSE/blob/develop/LICENSE.md) |
|
||||
| [hipSPARSELt](https://github.com/ROCm/hipSPARSELt/) | [MIT](https://github.com/ROCm/hipSPARSELt/blob/develop/LICENSE.md) |
|
||||
| [hipTensor](https://github.com/ROCm/hipTensor) | [MIT](https://github.com/ROCm/hipTensor/blob/develop/LICENSE) |
|
||||
| hsa-amd-aqlprofile | [AMD Software EULA](https://www.amd.com/en/legal/eula/amd-software-eula.html) |
|
||||
| [hipRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiprand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiprand/LICENSE.md) |
|
||||
| [hipSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsolver/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsolver/LICENSE.md) |
|
||||
| [hipSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparse/LICENSE.md) |
|
||||
| [hipSPARSELt](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipsparselt/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hipsparselt/LICENSE.md) |
|
||||
| [hipTensor](https://github.com/ROCm/rocm-libraries/tree/develop/projects/hiptensor/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/hiptensor/LICENSE) |
|
||||
| [llvm-project](https://github.com/ROCm/llvm-project/) | [Apache](https://github.com/ROCm/llvm-project/blob/amd-staging/LICENSE.TXT) |
|
||||
| [llvm-project/flang](https://github.com/ROCm/llvm-project/tree/amd-staging/flang) | [Apache 2.0](https://github.com/ROCm/llvm-project/blob/amd-staging/flang/LICENSE.TXT) |
|
||||
| [MIGraphX](https://github.com/ROCm/AMDMIGraphX/) | [MIT](https://github.com/ROCm/AMDMIGraphX/blob/develop/LICENSE) |
|
||||
| [MIOpen](https://github.com/ROCm/MIOpen/) | [MIT](https://github.com/ROCm/MIOpen/blob/develop/LICENSE.txt) |
|
||||
| [MIOpen](https://github.com/ROCm/rocm-libraries/tree/develop/projects/miopen/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/miopen/LICENSE.md) |
|
||||
| [MIVisionX](https://github.com/ROCm/MIVisionX/) | [MIT](https://github.com/ROCm/MIVisionX/blob/develop/LICENSE.txt) |
|
||||
| [rocAL](https://github.com/ROCm/rocAL) | [MIT](https://github.com/ROCm/rocAL/blob/develop/LICENSE.txt) |
|
||||
| [rocALUTION](https://github.com/ROCm/rocALUTION/) | [MIT](https://github.com/ROCm/rocALUTION/blob/develop/LICENSE.md) |
|
||||
| [rocBLAS](https://github.com/ROCm/rocBLAS/) | [MIT](https://github.com/ROCm/rocBLAS/blob/develop/LICENSE.md) |
|
||||
| [rocBLAS](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocblas/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocblas/LICENSE.md) |
|
||||
| [ROCdbgapi](https://github.com/ROCm/ROCdbgapi/) | [MIT](https://github.com/ROCm/ROCdbgapi/blob/amd-staging/LICENSE.txt) |
|
||||
| [rocDecode](https://github.com/ROCm/rocDecode) | [MIT](https://github.com/ROCm/rocDecode/blob/develop/LICENSE) |
|
||||
| [rocFFT](https://github.com/ROCm/rocFFT/) | [MIT](https://github.com/ROCm/rocFFT/blob/develop/LICENSE.md) |
|
||||
| [rocFFT](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocfft/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocfft/LICENSE.md) |
|
||||
| [ROCgdb](https://github.com/ROCm/ROCgdb/) | [GNU General Public License v3.0](https://github.com/ROCm/ROCgdb/blob/amd-staging/COPYING3) |
|
||||
| [rocJPEG](https://github.com/ROCm/rocJPEG/) | [MIT](https://github.com/ROCm/rocJPEG/blob/develop/LICENSE) |
|
||||
| [ROCK-Kernel-Driver](https://github.com/ROCm/ROCK-Kernel-Driver/) | [GPL 2.0 WITH Linux-syscall-note](https://github.com/ROCm/ROCK-Kernel-Driver/blob/master/COPYING) |
|
||||
| [rocminfo](https://github.com/ROCm/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocminfo/blob/amd-staging/License.txt) |
|
||||
| [rocminfo](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocminfo/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocminfo/License.txt) |
|
||||
| [ROCm Bandwidth Test](https://github.com/ROCm/rocm_bandwidth_test/) | [MIT](https://github.com/ROCm/rocm_bandwidth_test/blob/master/LICENSE.txt) |
|
||||
| [ROCm CMake](https://github.com/ROCm/rocm-cmake/) | [MIT](https://github.com/ROCm/rocm-cmake/blob/develop/LICENSE) |
|
||||
| [ROCm Communication Collectives Library (RCCL)](https://github.com/ROCm/rccl/) | [Custom](https://github.com/ROCm/rccl/blob/develop/LICENSE.txt) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-core) | [MIT](https://github.com/ROCm/rocm-core/blob/master/copyright) |
|
||||
| [ROCm Compute Profiler](https://github.com/ROCm/rocprofiler-compute) | [MIT](https://github.com/ROCm/rocprofiler-compute/blob/amd-staging/LICENSE) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rdc/) | [MIT](https://github.com/ROCm/rdc/blob/amd-staging/LICENSE) |
|
||||
| [ROCm-Core](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-core/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-core/LICENSE.md) |
|
||||
| [ROCm Compute Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-compute/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-compute/LICENSE.md) |
|
||||
| [ROCm Data Center (RDC)](https://github.com/ROCm/rocm-systems/tree/develop/projects/rdc/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rdc/LICENSE.md) |
|
||||
| [ROCm-Device-Libs](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/device-libs) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/LICENSE.TXT) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/clr/tree/amd-staging/opencl) | [MIT](https://github.com/ROCm/clr/blob/amd-staging/opencl/LICENSE.txt) |
|
||||
| [ROCm-OpenCL-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/clr/opencl/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/clr/opencl/LICENSE.md) |
|
||||
| [ROCm Performance Primitives (RPP)](https://github.com/ROCm/rpp) | [MIT](https://github.com/ROCm/rpp/blob/develop/LICENSE) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm_smi_lib/) | [MIT](https://github.com/ROCm/rocm_smi_lib/blob/amd-staging/License.txt) |
|
||||
| [ROCm Systems Profiler](https://github.com/ROCm/rocprofiler-systems) | [MIT](https://github.com/ROCm/rocprofiler-systems/blob/amd-staging/LICENSE) |
|
||||
| [ROCm SMI Lib](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocm-smi-lib/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocm-smi-lib/LICENSE.md) |
|
||||
| [ROCm Systems Profiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-systems/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-systems/LICENSE.md) |
|
||||
| [ROCm Validation Suite](https://github.com/ROCm/ROCmValidationSuite/) | [MIT](https://github.com/ROCm/ROCmValidationSuite/blob/master/LICENSE) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocPRIM/) | [MIT](https://github.com/ROCm/rocPRIM/blob/develop/LICENSE.txt) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocprofiler/) | [MIT](https://github.com/ROCm/rocprofiler/blob/amd-staging/LICENSE) |
|
||||
| [ROCprofiler-SDK](https://github.com/ROCm/rocprofiler-sdk) | [MIT](https://github.com/ROCm/rocprofiler-sdk/blob/amd-mainline/LICENSE) |
|
||||
| [rocPRIM](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocprim/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocprim/LICENSE.md) |
|
||||
| [ROCProfiler](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler/LICENSE.md) |
|
||||
| [ROCprofiler-SDK](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-sdk/LICENSE.md) |
|
||||
| [rocPyDecode](https://github.com/ROCm/rocPyDecode) | [MIT](https://github.com/ROCm/rocPyDecode/blob/develop/LICENSE.txt) |
|
||||
| [rocRAND](https://github.com/ROCm/rocRAND/) | [MIT](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt) |
|
||||
| [rocRAND](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocrand/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocrand/LICENSE.md) |
|
||||
| [ROCr Debug Agent](https://github.com/ROCm/rocr_debug_agent/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocr_debug_agent/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/ROCR-Runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/ROCR-Runtime/blob/amd-staging/LICENSE.txt) |
|
||||
| [ROCR-Runtime](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocr-runtime/) | [The University of Illinois/NCSA](https://github.com/ROCm/rocm-systems/blob/develop/projects/rocr-runtime/LICENSE.txt) |
|
||||
| [rocSHMEM](https://github.com/ROCm/rocSHMEM/) | [MIT](https://github.com/ROCm/rocSHMEM/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocSOLVER/) | [BSD-2-Clause](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocSPARSE/) | [MIT](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocThrust/) | [Apache 2.0](https://github.com/ROCm/rocThrust/blob/develop/LICENSE) |
|
||||
| [ROCTracer](https://github.com/ROCm/roctracer/) | [MIT](https://github.com/ROCm/roctracer/blob/amd-master/LICENSE) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocWMMA/) | [MIT](https://github.com/ROCm/rocWMMA/blob/develop/LICENSE.md) |
|
||||
| [Tensile](https://github.com/ROCm/Tensile/) | [MIT](https://github.com/ROCm/Tensile/blob/develop/LICENSE.md) |
|
||||
| [rocSOLVER](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsolver/) | [BSD-2-Clause](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsolver/LICENSE.md) |
|
||||
| [rocSPARSE](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocsparse/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocsparse/LICENSE.md) |
|
||||
| [rocThrust](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocthrust/) | [Apache 2.0](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocthrust/LICENSE) |
|
||||
| [ROCTracer](https://github.com/ROCm/rocm-systems/tree/develop/projects/roctracer/) | [MIT](https://github.com/ROCm/rocm-systems/blob/develop/projects/roctracer/LICENSE.md) |
|
||||
| [rocWMMA](https://github.com/ROCm/rocm-libraries/tree/develop/projects/rocwmma/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/LICENSE.md) |
|
||||
| [Tensile](https://github.com/ROCm/rocm-libraries/tree/develop/shared/tensile/) | [MIT](https://github.com/ROCm/rocm-libraries/blob/develop/shared/tensile/LICENSE.md) |
|
||||
| [TransferBench](https://github.com/ROCm/TransferBench) | [MIT](https://github.com/ROCm/TransferBench/blob/develop/LICENSE.md) |
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
@@ -132,12 +132,10 @@ companies.
|
||||
### Package licensing
|
||||
|
||||
:::{attention}
|
||||
AQL Profiler and AOCC CPU optimization are both provided in binary form, each
|
||||
subject to the license agreement enclosed in the directory for the binary available
|
||||
in `/opt/rocm/share/doc/hsa-amd-aqlprofile/EULA`. By using, installing,
|
||||
copying or distributing AQL Profiler and/or AOCC CPU Optimizations, you agree to
|
||||
ROCprof Trace Decoder and AOCC CPU optimizations are provided in binary form, subject to the license agreement enclosed on [GitHub](https://github.com/ROCm/rocprof-trace-decoder/blob/amd-mainline/LICENSE) for ROCprof Trace Decoder, and [Developer Central](https://www.amd.com/en/developer/aocc.html) for AOCC. By using, installing,
|
||||
copying or distributing ROCprof Trace Decoder or AOCC CPU Optimizations, you agree to
|
||||
the terms and conditions of this license agreement. If you do not agree to the
|
||||
terms of this agreement, do not install, copy or use the AQL Profiler and/or the
|
||||
terms of this agreement, do not install, copy or use ROCprof Trace Decoder or the
|
||||
AOCC CPU Optimizations.
|
||||
:::
|
||||
|
||||
|
||||
@@ -1,131 +1,136 @@
|
||||
ROCm Version,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,N/A,N/A,85f95ae,85f95ae,85f95ae,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,N/A,N/A,0.7.0,0.7.0,0.7.0,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
ROCm Version,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility-past-60]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 10.1, 10.0, 9.7, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,SLES 15 SP7,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8","Oracle Linux 9, 8",Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.10,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,Oracle Linux 8.9,,,
|
||||
,"Debian 13, 12","Debian 13, 12","Debian 13, 12",Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,Debian 12,,,,,,,,,,,
|
||||
,,,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,Azure Linux 3.0,,,,,,,,,,,,
|
||||
,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,Rocky Linux 9,,,,,,,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,CDNA4,CDNA4,,,,,,,,,,,,,,,,,,
|
||||
,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility-past-60]_,gfx950,gfx950,gfx950,gfx950,,,,,,,,,,,,,,,,,,
|
||||
,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,gfx1201,,,,,,,,,,,,,,,
|
||||
,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,gfx1200,,,,,,,,,,,,,,,
|
||||
,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,gfx1101,,,,,,,,,,,,,,,
|
||||
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942, gfx942, gfx942, gfx942, gfx942, gfx942, gfx942
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.8, 2.7, 2.6","2.7, 2.6, 2.5","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.19.1, 2.18.1, 2.17.1 [#tf-mi350-past-60]_","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.6.0,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,2.4.0,2.4.0,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.51.1,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,b6652,b6356,b6356,b6356,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`FlashInfer <../compatibility/ml-compatibility/flashinfer-compatibility>` [#flashinfer_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,v0.2.5,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.22.0,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.4.0,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.17.0,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
Thrust,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.8.5,2.8.5,2.6.0,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.20.0 [#mi325x_KVM-past-60]_, 30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x","30.10.2, 30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x","30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.13.0,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.5.0,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,1.0.0,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.6.0,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,2.1.0,2.1.0,2.0.0,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.26.6,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,3.0.2,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,1.0.0,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.20,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.7.0,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,3.0.0,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,4.0.1,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.4,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,4.0.0,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,5.0.2,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.34,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.30.1,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,4.0.2,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,2.0.0,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.44.0,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,2.0.0,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,4.0.1,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,4.0.0,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,7.0.2,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,26.0.2,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,1.1.0,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.8.0,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,2.6.0,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.2.3,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.1.1,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.70002,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,1.0.0,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.70002,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,20.0.0,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.4,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,16.3.0,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.1.0,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,20.0.0.25385,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,7.1.52802,7.1.25424,7.0.51831,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.18.0,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
||||
|
@@ -10,10 +10,9 @@ Use this matrix to view the ROCm compatibility and system requirements across su
|
||||
|
||||
You can also refer to the :ref:`past versions of ROCm compatibility matrix<past-rocm-compatibility-matrix>`.
|
||||
|
||||
Accelerators and GPUs listed in the following table support compute workloads (no display
|
||||
information or graphics). If you’re using ROCm with AMD Radeon or Radeon Pro GPUs for graphics
|
||||
workloads, see the `Use ROCm on Radeon GPU documentation
|
||||
<https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html>`_ to verify
|
||||
GPUs listed in the following table support compute workloads (no display
|
||||
information or graphics). If you’re using ROCm with AMD Radeon GPUs or Ryzen APUs for graphics
|
||||
workloads, see the :doc:`Use ROCm on Radeon and Ryzen <radeon:index>` to verify
|
||||
compatibility and system requirements.
|
||||
|
||||
.. |br| raw:: html
|
||||
@@ -23,28 +22,31 @@ compatibility and system requirements.
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.3", "6.4.2", "6.3.0"
|
||||
:header: "ROCm Version", "7.1.1", "7.1.0", "6.4.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>` [#os-compatibility]_,Ubuntu 24.04.3,Ubuntu 24.04.3,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
|
||||
,"RHEL 10.1, 10.0, 9.7, |br| 9.6, 9.4","RHEL 10.0, 9.6, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,SLES 15 SP7,SLES 15 SP7,SLES 15 SP6
|
||||
,"Oracle Linux 10, 9, 8","Oracle Linux 10, 9, 8","Oracle Linux 9, 8"
|
||||
,"Debian 13, 12","Debian 13, 12",Debian 12
|
||||
,,,Azure Linux 3.0
|
||||
,Rocky Linux 9,Rocky Linux 9,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,CDNA4,
|
||||
,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,
|
||||
,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
|
||||
,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
|
||||
,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>` [#gpu-compatibility]_,gfx950,gfx950,
|
||||
,gfx1201,gfx1201,
|
||||
,gfx1200,gfx1200,
|
||||
,gfx1101,gfx1101,
|
||||
,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942
|
||||
@@ -52,151 +54,122 @@ compatibility and system requirements.
|
||||
,gfx908,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.9, 2.8, 2.7","2.8, 2.7, 2.6","2.6, 2.5, 2.4, 2.3"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.20.0, 2.19.1, 2.18.1","2.20.0, 2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.7.1,0.7.1,0.4.35
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,2.4.0
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,b5997
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.23.1,1.22.0,1.20.0
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.4.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.17.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
Thrust,2.8.5,2.8.5,2.5.0
|
||||
CUB,2.8.5,2.8.5,2.5.0
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.20.1, 30.20.0 [#mi325x_KVM]_, |br| 30.10.2, 30.10.1 [#driver_patch]_, |br| 30.10, 6.4.x","30.20.0 [#mi325x_KVM]_, 30.10.2, |br| 30.10.1 [#driver_patch]_, 30.10, 6.4.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.14.0,2.14.0,2.12.0
|
||||
:doc:`MIOpen <miopen:index>`,3.5.1,3.5.1,3.4.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.4.0,3.4.0,3.2.0
|
||||
:doc:`rocAL <rocal:index>`,2.4.0,2.4.0,2.2.0
|
||||
:doc:`rocDecode <rocdecode:index>`,1.4.0,1.4.0,0.10.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,1.2.0,1.2.0,0.8.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.7.0,0.7.0,0.3.1
|
||||
:doc:`RPP <rpp:index>`,2.1.0,2.1.0,1.9.10
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,N/A
|
||||
:doc:`RCCL <rccl:index>`,2.27.7,2.27.7,2.22.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,3.1.0,3.0.0,2.0.0
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
:doc:`hipBLAS <hipblas:index>`,3.1.0,3.1.0,2.4.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,1.1.0,1.1.0,0.12.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.21,1.0.21,1.0.18
|
||||
:doc:`hipfort <hipfort:index>`,0.7.1,0.7.1,0.6.0
|
||||
:doc:`hipRAND <hiprand:index>`,3.1.0,3.1.0,2.12.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,3.1.0,3.1.0,2.4.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,4.1.0,4.1.0,3.2.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.5,0.2.5,0.2.3
|
||||
:doc:`rocALUTION <rocalution:index>`,4.0.1,4.0.1,3.2.2
|
||||
:doc:`rocBLAS <rocblas:index>`,5.1.1,5.1.0,4.4.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.35,1.0.35,1.0.32
|
||||
:doc:`rocRAND <rocrand:index>`,4.1.0,4.1.0,3.3.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.31.0,3.31.0,3.28.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,4.1.0,4.1.0,3.4.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,2.1.0,2.0.0,1.7.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.44.0,4.44.0,4.43.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
:doc:`hipCUB <hipcub:index>`,4.1.0,4.1.0,3.4.0
|
||||
:doc:`hipTensor <hiptensor:index>`,2.0.0,2.0.0,1.5.0
|
||||
:doc:`rocPRIM <rocprim:index>`,4.1.0,4.1.0,3.4.0
|
||||
:doc:`rocThrust <rocthrust:index>`,4.1.0,4.1.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.3.0
|
||||
`hipother <https://github.com/ROCm/hipother>`_,7.1.52802,7.1.25424,6.4.43482
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.1.1,7.1.0,6.4.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`AMD SMI <amdsmi:index>`,26.2.0,26.1.0,25.3.0
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.2.0,1.2.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.8.0,7.5.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.3.0,1.2.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60300
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,2.6.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.3.1,3.3.0,3.1.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.2.1,1.2.0,1.0.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70101,2.0.70100,2.0.60400
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,1.0.0,0.6.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.70101,4.1.70100,4.1.60400
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`HIPIFY <hipify:index>`,20.0.0,20.0.0,19.0.0
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.4,0.77.4,0.77.2
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,16.3.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.5.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.1.0,2.0.4
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
`Flang <https://github.com/ROCm/flang>`_,20.0.025444,20.0.025425,19.0.0.25133
|
||||
:doc:`llvm-project <llvm-project:index>`,20.0.025444,20.0.025425,19.0.0.25133
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.025444,20.0.025425,19.0.0.25133
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.3.42131
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,7.1.52802,7.1.25424,6.4.43482
|
||||
:doc:`HIP <hip:index>`,7.1.52802,7.1.25424,6.4.43482
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.18.0,1.15.0
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
|
||||
.. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__.
|
||||
.. [#dgl_compat] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
|
||||
.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 7.0.0 and ROCm 6.4.x.
|
||||
.. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
|
||||
.. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
.. _OS-kernel-versions:
|
||||
|
||||
Operating systems, kernel and Glibc versions
|
||||
*********************************************
|
||||
|
||||
Use this lookup table to confirm which operating system and kernel versions are supported with ROCm.
|
||||
|
||||
.. csv-table::
|
||||
:header: "OS", "Version", "Kernel", "Glibc"
|
||||
:widths: 40, 20, 30, 20
|
||||
:stub-columns: 1
|
||||
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
|
||||
,,
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
|
||||
,9.5, 5.14+, 2.34
|
||||
,9.4, 5.14+, 2.34
|
||||
,9.3, 5.14+, 2.34
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
|
||||
,8.9, 4.18.0, 2.28
|
||||
,,
|
||||
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
|
||||
,15 SP6, "6.5.0+, 6.4.0", 2.38
|
||||
,15 SP5, 5.14.21, 2.31
|
||||
,,
|
||||
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
|
||||
,8, 5.15.0 (UEK), 2.28
|
||||
,,
|
||||
`Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
|
||||
,,
|
||||
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
|
||||
,,
|
||||
For detailed information on operating system supported on ROCm 7.1.1 and associated Kernel and Glibc version, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -228,22 +201,18 @@ Expand for full historical view of:
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_621-past-60] **For ROCm 6.2.1** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_620-past-60] **For ROCm 6.2.0** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_612-past-60] **For ROCm 6.1.2** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_611-past-60] **For ROCm 6.1.1** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4 and Oracle Linux.
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#verl_compat] verl is only supported on ROCm 6.2.0.
|
||||
.. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#os-compatibility-past-60] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`__.
|
||||
.. [#gpu-compatibility-past-60] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`__, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`__, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`__.
|
||||
.. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
|
||||
.. [#verl_compat-past-60] verl is only supported on ROCm 7.0.0 and 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat-past-60] DGL is only supported on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
|
||||
.. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#ray_compat-past-60] Ray is only supported on ROCm 7.0.0 and 6.4.1.
|
||||
.. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 7.0.0 and 6.4.x.
|
||||
.. [#flashinfer_compat-past-60] FlashInfer is only supported on ROCm 6.4.1.
|
||||
.. [#mi325x_KVM-past-60] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
|
||||
.. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: Deep Graph Library (DGL) compatibility
|
||||
:keywords: GPU, DGL compatibility
|
||||
:keywords: GPU, CPU, deep graph library, DGL, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -10,215 +10,274 @@
|
||||
DGL compatibility
|
||||
********************************************************************************
|
||||
|
||||
Deep Graph Library `(DGL) <https://www.dgl.ai/>`_ is an easy-to-use, high-performance and scalable
|
||||
Deep Graph Library (`DGL <https://www.dgl.ai/>`__) is an easy-to-use, high-performance, and scalable
|
||||
Python package for deep learning on graphs. DGL is framework agnostic, meaning
|
||||
if a deep graph model is a component in an end-to-end application, the rest of
|
||||
that if a deep graph model is a component in an end-to-end application, the rest of
|
||||
the logic is implemented using PyTorch.
|
||||
|
||||
* ROCm support for DGL is hosted in the `https://github.com/ROCm/dgl <https://github.com/ROCm/dgl>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker images <dgl-docker-compat>` with DGL, PyTorch, and ROCm preinstalled.
|
||||
* See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>`
|
||||
to install and get started.
|
||||
DGL provides a high-performance graph object that can reside on either CPUs or GPUs.
|
||||
It bundles structural data features for better control and provides a variety of functions
|
||||
for computing with graph objects, including efficient and customizable message passing
|
||||
primitives for Graph Neural Networks.
|
||||
|
||||
|
||||
Supported devices
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: TF32 with AMD Instinct MI300X (through hipblaslt)
|
||||
- **Partially Supported**: TF32 with AMD Instinct MI250X
|
||||
- The ROCm-supported version of DGL is maintained in the official `https://github.com/ROCm/dgl
|
||||
<https://github.com/ROCm/dgl>`__ repository, which differs from the
|
||||
`https://github.com/dmlc/dgl <https://github.com/dmlc/dgl>`__ upstream repository.
|
||||
|
||||
- To get started and install DGL on ROCm, use the prebuilt :ref:`Docker images <dgl-docker-compat>`,
|
||||
which include ROCm, DGL, and all required dependencies.
|
||||
|
||||
.. _dgl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
DGL can be used for Graph Learning, and building popular graph models like
|
||||
GAT, GCN and GraphSage. Using these we can support a variety of use-cases such as:
|
||||
|
||||
- Recommender systems
|
||||
- Network Optimization and Analysis
|
||||
- 1D (Temporal) and 2D (Image) Classification
|
||||
- Drug Discovery
|
||||
|
||||
Multiple use cases of DGL have been tested and verified.
|
||||
However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
|
||||
Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_,
|
||||
where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs.
|
||||
|
||||
Coverage includes:
|
||||
|
||||
- Single-GPU training/inference
|
||||
- Multi-GPU training
|
||||
- See the :doc:`ROCm DGL installation guide <rocm-install-on-linux:install/3rd-party/dgl-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://www.dgl.ai/pages/start.html>`__
|
||||
for additional context.
|
||||
|
||||
.. _dgl-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`_.
|
||||
AMD validates and publishes `DGL images <https://hub.docker.com/r/rocm/dgl/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest available DGL version from the official Docker Hub.
|
||||
Click the |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: DGL Docker image components
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- DGL
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`_
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`_
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`_
|
||||
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
|
||||
|
||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||
- `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
|
||||
- `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
|
||||
- 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
|
||||
- MI300X, MI250X
|
||||
|
||||
|
||||
.. _dgl-key-rocm-libraries:
|
||||
|
||||
Key ROCm libraries for DGL
|
||||
================================================================================
|
||||
|
||||
DGL on ROCm depends on specific libraries that affect its features and performance.
|
||||
Using the DGL Docker container or building it with the provided docker file or a ROCm base image is recommended.
|
||||
Using the DGL Docker container or building it with the provided Docker file or a ROCm base image is recommended.
|
||||
If you prefer to build it yourself, ensure the following dependencies are installed:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- Version
|
||||
- ROCm 7.0.0 Version
|
||||
- ROCm 6.4.x Version
|
||||
- Purpose
|
||||
* - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
|
||||
- :version-ref:`"Composable Kernel" rocm_version`
|
||||
- 1.1.0
|
||||
- 1.1.0
|
||||
- Enables faster execution of core operations like matrix multiplication
|
||||
(GEMM), convolutions and transformations.
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
|
||||
- :version-ref:`hipBLAS rocm_version`
|
||||
- 3.0.0
|
||||
- 2.4.0
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
|
||||
- :version-ref:`hipBLASLt rocm_version`
|
||||
- 1.0.0
|
||||
- 0.12.0
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
* - `hipCUB <https://github.com/ROCm/hipCUB>`_
|
||||
- :version-ref:`hipCUB rocm_version`
|
||||
- 4.0.0
|
||||
- 3.4.0
|
||||
- Provides a C++ template library for parallel algorithms for reduction,
|
||||
scan, sort and select.
|
||||
* - `hipFFT <https://github.com/ROCm/hipFFT>`_
|
||||
- :version-ref:`hipFFT rocm_version`
|
||||
- 1.0.20
|
||||
- 1.0.18
|
||||
- Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
|
||||
* - `hipRAND <https://github.com/ROCm/hipRAND>`_
|
||||
- :version-ref:`hipRAND rocm_version`
|
||||
- 3.0.0
|
||||
- 2.12.0
|
||||
- Provides fast random number generation for GPUs.
|
||||
* - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
|
||||
- :version-ref:`hipSOLVER rocm_version`
|
||||
- 3.0.0
|
||||
- 2.4.0
|
||||
- Provides GPU-accelerated solvers for linear systems, eigenvalues, and
|
||||
singular value decompositions (SVD).
|
||||
* - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
|
||||
- :version-ref:`hipSPARSE rocm_version`
|
||||
- 4.0.1
|
||||
- 3.2.0
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
|
||||
- :version-ref:`hipSPARSELt rocm_version`
|
||||
- 0.2.4
|
||||
- 0.2.3
|
||||
- Accelerates operations on sparse matrices, such as sparse matrix-vector
|
||||
or matrix-matrix products.
|
||||
* - `hipTensor <https://github.com/ROCm/hipTensor>`_
|
||||
- :version-ref:`hipTensor rocm_version`
|
||||
- 2.0.0
|
||||
- 1.5.0
|
||||
- Optimizes for high-performance tensor operations, such as contractions.
|
||||
* - `MIOpen <https://github.com/ROCm/MIOpen>`_
|
||||
- :version-ref:`MIOpen rocm_version`
|
||||
- 3.5.0
|
||||
- 3.4.0
|
||||
- Optimizes deep learning primitives such as convolutions, pooling,
|
||||
normalization, and activation functions.
|
||||
* - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
|
||||
- :version-ref:`MIGraphX rocm_version`
|
||||
- 2.13.0
|
||||
- 2.12.0
|
||||
- Adds graph-level optimizations, ONNX models and mixed precision support
|
||||
and enable Ahead-of-Time (AOT) Compilation.
|
||||
* - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
|
||||
- :version-ref:`MIVisionX rocm_version`
|
||||
- 3.3.0
|
||||
- 3.2.0
|
||||
- Optimizes acceleration for computer vision and AI workloads like
|
||||
preprocessing, augmentation, and inferencing.
|
||||
* - `rocAL <https://github.com/ROCm/rocAL>`_
|
||||
- :version-ref:`rocAL rocm_version`
|
||||
- 3.3.0
|
||||
- 2.2.0
|
||||
- Accelerates the data pipeline by offloading intensive preprocessing and
|
||||
augmentation tasks. rocAL is part of MIVisionX.
|
||||
* - `RCCL <https://github.com/ROCm/rccl>`_
|
||||
- :version-ref:`RCCL rocm_version`
|
||||
- 2.26.6
|
||||
- 2.22.3
|
||||
- Optimizes for multi-GPU communication for operations like AllReduce and
|
||||
Broadcast.
|
||||
* - `rocDecode <https://github.com/ROCm/rocDecode>`_
|
||||
- :version-ref:`rocDecode rocm_version`
|
||||
- 1.0.0
|
||||
- 0.10.0
|
||||
- Provides hardware-accelerated data decoding capabilities, particularly
|
||||
for image, video, and other dataset formats.
|
||||
* - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
|
||||
- :version-ref:`rocJPEG rocm_version`
|
||||
- 1.1.0
|
||||
- 0.8.0
|
||||
- Provides hardware-accelerated JPEG image decoding and encoding.
|
||||
* - `RPP <https://github.com/ROCm/RPP>`_
|
||||
- :version-ref:`RPP rocm_version`
|
||||
- 2.0.0
|
||||
- 1.9.10
|
||||
- Speeds up data augmentation, transformation, and other preprocessing steps.
|
||||
* - `rocThrust <https://github.com/ROCm/rocThrust>`_
|
||||
- :version-ref:`rocThrust rocm_version`
|
||||
- 4.0.0
|
||||
- 3.3.0
|
||||
- Provides a C++ template library for parallel algorithms like sorting,
|
||||
reduction, and scanning.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
|
||||
- :version-ref:`rocWMMA rocm_version`
|
||||
- 2.0.0
|
||||
- 1.7.0
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
|
||||
.. _dgl-supported-features-latest:
|
||||
|
||||
Supported features
|
||||
Supported features with ROCm 7.0.0
|
||||
================================================================================
|
||||
|
||||
Many functions and methods available in DGL Upstream are also supported in DGL ROCm.
|
||||
Many functions and methods available upstream are also supported in DGL on ROCm.
|
||||
Instead of listing them all, support is grouped into the following categories to provide a general overview.
|
||||
|
||||
* DGL Base
|
||||
* DGL Backend
|
||||
* DGL Data
|
||||
* DGL Dataloading
|
||||
* DGL DGLGraph
|
||||
* DGL Graph
|
||||
* DGL Function
|
||||
* DGL Ops
|
||||
* DGL Sampling
|
||||
@@ -230,26 +289,76 @@ Instead of listing them all, support is grouped into the following categories to
|
||||
* DGL NN
|
||||
* DGL Optim
|
||||
* DGL Sparse
|
||||
* GraphBolt
|
||||
|
||||
.. _dgl-unsupported-features-latest:
|
||||
|
||||
Unsupported features
|
||||
Unsupported features with ROCm 7.0.0
|
||||
================================================================================
|
||||
|
||||
* Graphbolt
|
||||
* Partial TF32 Support (MI250x only)
|
||||
* Kineto/ ROCTracer integration
|
||||
* TF32 Support (only supported for PyTorch 2.7 and above)
|
||||
* Kineto/ROCTracer integration
|
||||
|
||||
.. _dgl-unsupported-functions:
|
||||
|
||||
Unsupported functions
|
||||
Unsupported functions with ROCm 7.0.0
|
||||
================================================================================
|
||||
|
||||
* ``more_nnz``
|
||||
* ``bfs``
|
||||
* ``format``
|
||||
* ``multiprocess_sparse_adam_state_dict``
|
||||
* ``record_stream_ndarray``
|
||||
* ``half_spmm``
|
||||
* ``segment_mm``
|
||||
* ``gather_mm_idx_b``
|
||||
* ``pgexplainer``
|
||||
* ``sample_labors_prob``
|
||||
* ``sample_labors_noprob``
|
||||
* ``sparse_admin``
|
||||
|
||||
.. _dgl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
DGL can be used for Graph Learning, and building popular graph models like
|
||||
GAT, GCN, and GraphSage. Using these models, a variety of use cases are supported:
|
||||
|
||||
- Recommender systems
|
||||
- Network Optimization and Analysis
|
||||
- 1D (Temporal) and 2D (Image) Classification
|
||||
- Drug Discovery
|
||||
|
||||
For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
* Although multiple use cases of DGL have been tested and verified, a few have been
|
||||
outlined in the `DGL in the Real World: Running GNNs on Real Use Cases
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog
|
||||
post, which walks through four real-world graph neural network (GNN) workloads
|
||||
implemented with the Deep Graph Library on ROCm. It covers tasks ranging from
|
||||
heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph
|
||||
regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use
|
||||
case, the authors detail: the dataset and task, how DGL is used, and their experience
|
||||
porting to ROCm. It is shown that DGL codebases often run without modification, with
|
||||
seamless integration of graph operations, message passing, sampling, and convolution.
|
||||
|
||||
* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__
|
||||
blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform,
|
||||
bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges
|
||||
the gap between dense tensor frameworks and the irregular nature of graph data through a
|
||||
graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and
|
||||
interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration
|
||||
enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers
|
||||
and open-source repositories. This marks a major step in AMD's mission to advance open,
|
||||
scalable AI ecosystems beyond traditional architectures.
|
||||
|
||||
You can pre-process datasets and begin training on AMD GPUs through:
|
||||
|
||||
* Single-GPU training/inference
|
||||
* Multi-GPU training
|
||||
|
||||
|
||||
Previous versions
|
||||
===============================================================================
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
|
||||
of the ``ROCm/dgl`` Docker image.
|
||||
@@ -0,0 +1,98 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: FlashInfer compatibility
|
||||
:keywords: GPU, LLM, FlashInfer, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
FlashInfer compatibility
|
||||
********************************************************************************
|
||||
|
||||
`FlashInfer <https://docs.flashinfer.ai/index.html>`__ is a library and kernel generator
|
||||
for Large Language Models (LLMs) that provides a high-performance implementation of graphics
|
||||
processing units (GPUs) kernels. FlashInfer focuses on LLM serving and inference, as well
|
||||
as advanced performance across diverse scenarios.
|
||||
|
||||
FlashInfer features highly efficient attention kernels, load-balanced scheduling, and memory-optimized
|
||||
techniques, while supporting customized attention variants. It’s compatible with ``torch.compile``, and
|
||||
offers high-performance LLM-specific operators, with easy integration through PyTorch, and C++ APIs.
|
||||
|
||||
.. note::
|
||||
|
||||
The ROCm port of FlashInfer is under active development, and some features are not yet available.
|
||||
For the latest feature compatibility matrix, refer to the ``README`` of the
|
||||
`https://github.com/ROCm/flashinfer <https://github.com/ROCm/flashinfer>`__ repository.
|
||||
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- The ROCm-supported version of FlashInfer is maintained in the official `https://github.com/ROCm/flashinfer
|
||||
<https://github.com/ROCm/flashinfer>`__ repository, which differs from the
|
||||
`https://github.com/flashinfer-ai/flashinfer <https://github.com/flashinfer-ai/flashinfer>`__
|
||||
upstream repository.
|
||||
|
||||
- To get started and install FlashInfer on ROCm, use the prebuilt :ref:`Docker images <flashinfer-docker-compat>`,
|
||||
which include ROCm, FlashInfer, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm FlashInfer installation guide <rocm-install-on-linux:install/3rd-party/flashinfer-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://docs.flashinfer.ai/installation.html>`__
|
||||
for additional context.
|
||||
|
||||
.. _flashinfer-docker-compat:
|
||||
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `FlashInfer images <https://hub.docker.com/r/rocm/flashinfer/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tag and associated
|
||||
inventories represent the latest available FlashInfer version from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- FlashInfer
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/flashinfer/flashinfer-0.2.5_rocm6.4_ubuntu24.04_py3.12_pytorch2.7/images/sha256-558914838821c88c557fb6d42cfbc1bdb67d79d19759f37c764a9ee801f93313"><i class="fab fa-docker fa-lg"></i> rocm/flashinfer</a>
|
||||
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
|
||||
- `v0.2.5 <https://github.com/flashinfer-ai/flashinfer/releases/tag/v0.2.5>`__
|
||||
- `2.7.1 <https://github.com/ROCm/pytorch/releases/tag/v2.7.1>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-3129/>`__
|
||||
- MI300X
|
||||
|
||||
.. _flashinfer-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
The release of FlashInfer on ROCm provides the decode functionality for LLM inferencing.
|
||||
In the decode phase, tokens are generated sequentially, with the model predicting each new
|
||||
token based on the previously generated tokens and the input context.
|
||||
|
||||
FlashInfer on ROCm brings over upstream features such as load balancing, sparse and dense
|
||||
attention optimizations, and batching support, enabling efficient execution on AMD Instinct™ MI300X GPUs.
|
||||
|
||||
Because large LLMs often require substantial KV caches or long context windows, FlashInfer on ROCm
|
||||
also implements cascade attention from upstream to reduce memory usage.
|
||||
|
||||
For currently supported use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: JAX compatibility
|
||||
:keywords: GPU, JAX compatibility
|
||||
:keywords: GPU, JAX, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -10,42 +10,58 @@
|
||||
JAX compatibility
|
||||
*******************************************************************************
|
||||
|
||||
JAX provides a NumPy-like API, which combines automatic differentiation and the
|
||||
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
|
||||
learning at scale.
|
||||
`JAX <https://docs.jax.dev/en/latest/notebooks/thinking_in_jax.html>`__ is a library
|
||||
for array-oriented numerical computation (similar to NumPy), with automatic differentiation
|
||||
and just-in-time (JIT) compilation to enable high-performance machine learning research.
|
||||
|
||||
JAX uses composable transformations of Python and NumPy through just-in-time
|
||||
(JIT) compilation, automatic vectorization, and parallelization. To learn about
|
||||
JAX, including profiling and optimizations, see the official `JAX documentation
|
||||
<https://jax.readthedocs.io/en/latest/notebooks/quickstart.html>`_.
|
||||
JAX provides an API that combines automatic differentiation and the
|
||||
Accelerated Linear Algebra (XLA) compiler to achieve high-performance machine
|
||||
learning at scale. JAX uses composable transformations of Python and NumPy through
|
||||
JIT compilation, automatic vectorization, and parallelization.
|
||||
|
||||
ROCm support for JAX is upstreamed, and users can build the official source code
|
||||
with ROCm support:
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- ROCm JAX release:
|
||||
- The ROCm-supported version of JAX is maintained in the official `https://github.com/ROCm/rocm-jax
|
||||
<https://github.com/ROCm/rocm-jax>`__ repository, which differs from the
|
||||
`https://github.com/jax-ml/jax <https://github.com/jax-ml/jax>`__ upstream repository.
|
||||
|
||||
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
|
||||
with ROCm and JAX preinstalled.
|
||||
- To get started and install JAX on ROCm, use the prebuilt :ref:`Docker images <jax-docker-compat>`,
|
||||
which include ROCm, JAX, and all required dependencies.
|
||||
|
||||
- ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
|
||||
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
to get started.
|
||||
- You can also consult the upstream `Installation guide <https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`__
|
||||
for additional context.
|
||||
|
||||
- Official JAX release:
|
||||
Version support
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
- Official JAX repository: `jax-ml/jax <https://github.com/jax-ml/jax>`_
|
||||
AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax/tags>`_
|
||||
quarterly alongside new ROCm releases. These images undergo full AMD testing.
|
||||
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community/tags>`_
|
||||
follow upstream JAX releases and use the latest available ROCm version.
|
||||
|
||||
- See the `AMD GPU (Linux) installation section
|
||||
<https://jax.readthedocs.io/en/latest/installation.html#amd-gpu-linux>`_ in
|
||||
the JAX documentation.
|
||||
JAX Plugin-PJRT with JAX/JAXLIB compatibility
|
||||
================================================================================
|
||||
|
||||
.. note::
|
||||
Portable JIT Runtime (PJRT) is an open, stable interface for device runtime and
|
||||
compiler. The following table details the ROCm version compatibility matrix
|
||||
between JAX Plugin–PJRT and JAX/JAXLIB.
|
||||
|
||||
AMD releases official `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
quarterly alongside new ROCm releases. These images undergo full AMD testing.
|
||||
`Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
follow upstream JAX releases and use the latest available ROCm version.
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - JAX Plugin-PJRT
|
||||
- JAX/JAXLIB
|
||||
- ROCm
|
||||
* - 0.7.1
|
||||
- 0.7.1
|
||||
- 7.1.1, 7.1.0
|
||||
* - 0.6.0
|
||||
- 0.6.2, 0.6.0
|
||||
- 7.0.2, 7.0.1, 7.0.0
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
@@ -71,7 +87,7 @@ Use cases and recommendations
|
||||
* The `Distributed fine-tuning with JAX on AMD GPUs <https://rocm.blogs.amd.com/artificial-intelligence/distributed-sft-jax/README.html>`_
|
||||
outlines the process of fine-tuning a Bidirectional Encoder Representations
|
||||
from Transformers (BERT)-based large language model (LLM) using JAX for a text
|
||||
classification task. The blog post discuss techniques for parallelizing the
|
||||
classification task. The blog post discusses techniques for parallelizing the
|
||||
fine-tuning across multiple AMD GPUs and assess the model's performance on a
|
||||
holdout dataset. During the fine-tuning, a BERT-base-cased transformer model
|
||||
and the General Language Understanding Evaluation (GLUE) benchmark dataset was
|
||||
@@ -79,7 +95,7 @@ Use cases and recommendations
|
||||
|
||||
* The `MI300X workload optimization guide <https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html>`_
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. The page is aimed at helping users achieve optimal
|
||||
GPU using ROCm. The page is aimed at helping users achieve optimal
|
||||
performance for deep learning and other high-performance computing tasks on
|
||||
the MI300X GPU.
|
||||
|
||||
@@ -90,75 +106,15 @@ For more use cases and recommendations, see `ROCm JAX blog posts <https://rocm.b
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
AMD validates and publishes `JAX images <https://hub.docker.com/r/rocm/jax/tags>`__
|
||||
with ROCm backends on Docker Hub.
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
For ``jax-community`` images, see `rocm/jax-community
|
||||
<https://hub.docker.com/r/rocm/jax-community/tags>`__ on Docker Hub.
|
||||
|
||||
AMD validates and publishes ready-made `ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories represent the latest JAX version from the official Docker Hub and are validated for
|
||||
`ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`_. Click the |docker-icon|
|
||||
icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: JAX Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.12/images/sha256-8918fa806a172c1a10eb2f57131eb31b5d7c8fa1656b8729fe7d3d736112de83"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 24.04
|
||||
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax/rocm6.4.2-jax0.4.35-py3.10/images/sha256-a394be13c67b7fc602216abee51233afd4b6cb7adaa57ca97e688fba82f9ad79"><i class="fab fa-docker fa-lg"></i> rocm/jax</a>
|
||||
|
||||
- `0.4.35 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.4.35>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.17 <https://www.python.org/downloads/release/python-31017/>`_
|
||||
|
||||
AMD publishes `Community ROCm JAX Docker images <https://hub.docker.com/r/rocm/jax-community>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories are tested for `ROCm 6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_.
|
||||
|
||||
.. list-table:: JAX community Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- JAX
|
||||
- Linux
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.12.8/images/sha256-25dfaa0183e274bd0a3554a309af3249c6f16a1793226cb5373f418e39d3146a"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.12.8 <https://www.python.org/downloads/release/python-3128/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.11.11/images/sha256-ff9baeca9067d13e6c279c911e5a9e5beed0817d24fafd424367cc3d5bd381d7"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.11.11 <https://www.python.org/downloads/release/python-31111/>`_
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/jax-community/rocm6.3.2-jax0.5.0-py3.10.16/images/sha256-8bab484be1713655f74da51a191ed824bb9d03db1104fd63530a1ac3c37cf7b1"><i class="fab fa-docker fa-lg"></i> rocm/jax-community</a>
|
||||
|
||||
- `0.5.0 <https://github.com/ROCm/jax/releases/tag/rocm-jax-v0.5.0>`_
|
||||
- Ubuntu 22.04
|
||||
- `3.10.16 <https://www.python.org/downloads/release/python-31016/>`_
|
||||
To find the right image tag, see the :ref:`JAX on ROCm installation
|
||||
documentation <rocm-install-on-linux:jax-docker-support>` for a list of
|
||||
available ``rocm/jax`` images.
|
||||
|
||||
.. _key_rocm_libraries:
|
||||
|
||||
@@ -294,7 +250,7 @@ The ROCm supported data types in JAX are collected in the following table.
|
||||
|
||||
.. note::
|
||||
|
||||
JAX data type support is effected by the :ref:`key_rocm_libraries` and it's
|
||||
JAX data type support is affected by the :ref:`key_rocm_libraries` and it's
|
||||
collected on :doc:`ROCm data types and precision support <rocm:reference/precision-support>`
|
||||
page.
|
||||
|
||||
@@ -310,5 +266,54 @@ For a complete and up-to-date list of JAX public modules (for example, ``jax.num
|
||||
Since version 0.1.56, JAX has full support for ROCm, and the
|
||||
:ref:`Known issues and important notes <jax_comp_known_issues>` section
|
||||
contains details about limitations specific to the ROCm backend. The list of
|
||||
JAX API modules is maintained by the JAX project and is subject to change.
|
||||
JAX API modules are maintained by the JAX project and is subject to change.
|
||||
Refer to the official Jax documentation for the most up-to-date information.
|
||||
|
||||
Key features and enhancements for ROCm 7.0
|
||||
===============================================================================
|
||||
|
||||
- Upgraded XLA backend: Integrates a newer XLA version, enabling better
|
||||
optimizations, broader operator support, and potential performance gains.
|
||||
|
||||
- RNN support: Native RNN support (including LSTMs via ``jax.experimental.rnn``)
|
||||
now available on ROCm, aiding sequence model development.
|
||||
|
||||
- Comprehensive linear algebra capabilities: Offers robust ``jax.linalg``
|
||||
operations, essential for scientific and machine learning tasks.
|
||||
|
||||
- Expanded AMD GPU architecture support: Provides ongoing support for gfx1101
|
||||
GPUs and introduces support for gfx950 and gfx12xx GPUs.
|
||||
|
||||
- Mixed FP8 precision support: Enables ``lax.dot_general`` operations with mixed FP8
|
||||
types, offering pathways for memory and compute efficiency.
|
||||
|
||||
- Streamlined PyPi packaging: Provides reliable PyPi wheels for JAX on ROCm,
|
||||
simplifying the installation process.
|
||||
|
||||
- Pallas experimental kernel development: Continued Pallas framework
|
||||
enhancements for custom GPU kernels, including new intrinsics (specific
|
||||
kernel behaviors under review).
|
||||
|
||||
- Improved build system and CI: Enhanced ROCm build system and CI for greater
|
||||
reliability and maintainability.
|
||||
|
||||
- Enhanced distributed computing setup: Improved JAX setup in multi-GPU
|
||||
distributed environments.
|
||||
|
||||
.. _jax_comp_known_issues:
|
||||
|
||||
Known issues and notes for ROCm 7.0
|
||||
===============================================================================
|
||||
|
||||
- ``nn.dot_product_attention``: Certain configurations of ``jax.nn.dot_product_attention``
|
||||
may cause segmentation faults, though the majority of use cases work correctly.
|
||||
|
||||
- SVD with dynamic shapes: SVD on inputs with dynamic/symbolic shapes might result in an error.
|
||||
SVD with static shapes is unaffected.
|
||||
|
||||
- QR decomposition with symbolic shapes: QR decomposition operations may fail when using
|
||||
symbolic/dynamic shapes in shape polymorphic contexts.
|
||||
|
||||
- Pallas kernels: Specific advanced Pallas kernels may exhibit variations in
|
||||
numerical output or resource usage. These are actively reviewed as part of
|
||||
Pallas's experimental development.
|
||||
|
||||
275
docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
Normal file
275
docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
Normal file
@@ -0,0 +1,275 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: llama.cpp compatibility
|
||||
:keywords: GPU, GGML, llama.cpp, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
********************************************************************************
|
||||
llama.cpp compatibility
|
||||
********************************************************************************
|
||||
|
||||
`llama.cpp <https://github.com/ggml-org/llama.cpp>`__ is an open-source framework
|
||||
for Large Language Model (LLM) inference that runs on both central processing units
|
||||
(CPUs) and graphics processing units (GPUs). It is written in plain C/C++, providing
|
||||
a simple, dependency-free setup.
|
||||
|
||||
The framework supports multiple quantization options, from 1.5-bit to 8-bit integers,
|
||||
to accelerate inference and reduce memory usage. Originally built as a CPU-first library,
|
||||
llama.cpp is easy to integrate with other programming environments and is widely
|
||||
adopted across diverse platforms, including consumer devices.
|
||||
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- The ROCm-supported version of llama.cpp is maintained in the official `https://github.com/ROCm/llama.cpp
|
||||
<https://github.com/ROCm/llama.cpp>`__ repository, which differs from the
|
||||
`https://github.com/ggml-org/llama.cpp <https://github.com/ggml-org/llama.cpp>`__ upstream repository.
|
||||
|
||||
- To get started and install llama.cpp on ROCm, use the prebuilt :ref:`Docker images <llama-cpp-docker-compat>`,
|
||||
which include ROCm, llama.cpp, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm llama.cpp installation guide <rocm-install-on-linux:install/3rd-party/llama-cpp-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md>`__
|
||||
for additional context.
|
||||
|
||||
.. _llama-cpp-docker-compat:
|
||||
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `llama.cpp images <https://hub.docker.com/r/rocm/llama.cpp/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest available llama.cpp versions from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. important::
|
||||
|
||||
Tag endings of ``_full``, ``_server``, and ``_light`` serve different purposes for entrypoints as follows:
|
||||
|
||||
- Full: This image includes both the main executable file and the tools to convert ``LLaMA`` models into ``ggml`` and convert into 4-bit quantization.
|
||||
- Server: This image only includes the server executable file.
|
||||
- Light: This image only includes the main executable file.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Full Docker
|
||||
- Server Docker
|
||||
- Light Docker
|
||||
- llama.cpp
|
||||
- ROCm
|
||||
- Ubuntu
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- 24.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- 22.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_full/images/sha256-5960fc850024a8a76451f9eaadd89b7e59981ae9f393b407310c1ddf18892577"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_server/images/sha256-1b79775d9f546065a6aaf9ca426e1dd4ed4de0b8f6ee83687758cc05af6538e6"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu24.04_light/images/sha256-8f863c4c2857ae42bebd64e4f1a0a1e7cc3ec4503f243e32b4a4dcad070ec361"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
|
||||
- 24.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_full/images/sha256-888879b3ee208f9247076d7984524b8d1701ac72611689e89854a1588bec9867"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_server/images/sha256-90e4ff99a66743e33fd00728cd71a768588e5f5ef355aaa196669fe65ac70672"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.3_ubuntu22.04_light/images/sha256-bd447a049939cb99054f8fbf3f2352870fe906a75e2dc3339c845c08b9c53f9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
|
||||
- 22.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_full/images/sha256-5b3a1bc4889c1fcade434b937fbf9cc1c22ff7dc0317c130339b0c9238bc88c4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_server/images/sha256-5228ff99d0f627a9032d668f4381b2e80dc1e301adc3e0821f26d8354b175271"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu24.04_light/images/sha256-b12723b332a826a89b7252dddf868cbe4d1a869562fc4aa4032f59e1a683b968"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
|
||||
- 24.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_full/images/sha256-cd6e21a6a73f59b35dd5309b09dd77654a94d783bf13a55c14eb8dbf8e9c2615"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_server/images/sha256-c2b4689ab2c47e6626e8fea22d7a63eb03d47c0fde9f5ef8c9f158d15c423e58"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.2_ubuntu22.04_light/images/sha256-1acc28f29ed87db9cbda629cb29e1989b8219884afe05f9105522be929e94da4"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__
|
||||
- 22.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_full/images/sha256-2f8ae8a44510d96d52dea6cb398b224f7edeb7802df7ec488c6f63d206b3cdc9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_server/images/sha256-fece497ff9f4a28b12f645de52766941da8ead8471aa1ea84b61d4b4568e51f2"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu24.04_light/images/sha256-3e14352fa6f8c6128b23cf9342531c20dbfb522550b626e09d83b260a1947022"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
|
||||
- 24.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_full/images/sha256-80763062ef0bec15038c35fd01267f1fc99a5dd171d4b48583cc668b15efad69"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_server/images/sha256-db2a6c957555ed83b819bbc54aea884a93192da0fb512dae63d32e0dc4e8ab8f"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm6.4.1_ubuntu22.04_light/images/sha256-c6dbb07cc655fb079d5216e4b77451cb64a9daa0585d23b6fb8b32cb22021197"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
|
||||
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
|
||||
- 22.04
|
||||
- MI325X, MI300X, MI210
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_full/images/sha256-f78f6c81ab2f8e957469415fe2370a1334fe969c381d1fe46050c85effaee9d5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_server/images/sha256-275ad9e18f292c26a00a2de840c37917e98737a88a3520bdc35fd3fc5c9a6a9b"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b5997_rocm6.4.0_ubuntu24.04_light/images/sha256-cc324e6faeedf0e400011f07b49d2dc41a16bae257b2b7befa0f4e2e97231320"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
|
||||
- `b5997 <https://github.com/ROCm/llama.cpp/tree/release/b5997>`__
|
||||
- `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
|
||||
- 24.04
|
||||
- MI300X, MI210
|
||||
|
||||
.. _llama-cpp-key-rocm-libraries:
|
||||
|
||||
Key ROCm libraries for llama.cpp
|
||||
================================================================================
|
||||
|
||||
llama.cpp functionality on ROCm is determined by its underlying library
|
||||
dependencies. These ROCm components affect the capabilities, performance, and
|
||||
feature set available to developers. Ensure you have the required libraries for
|
||||
your corresponding ROCm version.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - ROCm library
|
||||
- ROCm 7.0.0 version
|
||||
- ROCm 6.4.x version
|
||||
- Purpose
|
||||
- Usage
|
||||
* - `hipBLAS <https://github.com/ROCm/hipBLAS>`__
|
||||
- 3.0.0
|
||||
- 2.4.0
|
||||
- Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
|
||||
matrix and vector operations.
|
||||
- Supports operations such as matrix multiplication, matrix-vector
|
||||
products, and tensor contractions. Utilized in both dense and batched
|
||||
linear algebra operations.
|
||||
* - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`__
|
||||
- 1.0.0
|
||||
- 0.12.0
|
||||
- hipBLASLt is an extension of the hipBLAS library, providing additional
|
||||
features like epilogues fused into the matrix multiplication kernel or
|
||||
use of integer tensor cores.
|
||||
- By setting the flag ``ROCBLAS_USE_HIPBLASLT``, you can dispatch hipblasLt
|
||||
kernels where possible.
|
||||
* - `rocWMMA <https://github.com/ROCm/rocWMMA>`__
|
||||
- 2.0.0
|
||||
- 1.7.0
|
||||
- Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
|
||||
multiplication (GEMM) and accumulation operations with mixed precision
|
||||
support.
|
||||
- Can be used to enhance the flash attention performance on AMD compute, by enabling
|
||||
the flag during compile time.
|
||||
|
||||
.. _llama-cpp-uses-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
llama.cpp can be applied in a variety of scenarios, particularly when you need to meet one or more of the following requirements:
|
||||
|
||||
- Plain C/C++ implementation with no external dependencies
|
||||
- Support for 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory usage
|
||||
- Custom HIP (Heterogeneous-compute Interface for Portability) kernels for running large language models (LLMs) on AMD GPUs (graphics processing units)
|
||||
- CPU (central processing unit) + GPU (graphics processing unit) hybrid inference for partially accelerating models larger than the total available VRAM (video random-access memory)
|
||||
|
||||
llama.cpp is also used in a range of real-world applications, including:
|
||||
|
||||
- Games such as `Lucy's Labyrinth <https://github.com/MorganRO8/Lucys_Labyrinth>`__:
|
||||
A simple maze game where AI-controlled agents attempt to trick the player.
|
||||
- Tools such as `Styled Lines <https://marketplace.unity.com/packages/tools/ai-ml-integration/style-text-webgl-ios-stand-alone-llm-llama-cpp-wrapper-292902>`__:
|
||||
A proprietary, asynchronous inference wrapper for Unity3D game development, including pre-built mobile and web platform wrappers and a model example.
|
||||
- Various other AI applications use llama.cpp as their inference engine;
|
||||
for a detailed list, see the `user interfaces (UIs) section <https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#description>`__.
|
||||
|
||||
For more use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for llama.cpp examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
- The `Llama.cpp Meets Instinct: A New Era of Open-Source AI Acceleration <https://rocm.blogs.amd.com/ecosystems-and-partners/llama-cpp/README.html>`__
|
||||
blog post outlines how the open-source llama.cpp framework enables efficient LLM inference—including interactive inference with ``llama-cli``,
|
||||
server deployment with ``llama-server``, GGUF model preparation and quantization, performance benchmarking, and optimizations tailored for
|
||||
AMD Instinct GPUs within the ROCm ecosystem.
|
||||
|
||||
|
||||
Previous versions
|
||||
===============================================================================
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/llama-cpp-history` to find documentation for previous releases
|
||||
of the ``ROCm/llama.cpp`` Docker image.
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: Megablocks compatibility
|
||||
:keywords: GPU, megablocks, compatibility
|
||||
:keywords: GPU, megablocks, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -10,64 +10,41 @@
|
||||
Megablocks compatibility
|
||||
********************************************************************************
|
||||
|
||||
Megablocks is a light-weight library for mixture-of-experts (MoE) training.
|
||||
`Megablocks <https://github.com/databricks/megablocks>`__ is a lightweight library
|
||||
for mixture-of-experts `(MoE) <https://huggingface.co/blog/moe>`__ training.
|
||||
The core of the system is efficient "dropless-MoE" and standard MoE layers.
|
||||
Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_,
|
||||
Megablocks is integrated with `https://github.com/stanford-futuredata/Megatron-LM
|
||||
<https://github.com/stanford-futuredata/Megatron-LM>`__,
|
||||
where data and pipeline parallel training of MoEs is supported.
|
||||
|
||||
* ROCm support for Megablocks is hosted in the official `https://github.com/ROCm/megablocks <https://github.com/ROCm/megablocks>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <megablocks-docker-compat>` with ROCm, PyTorch, and Megablocks preinstalled.
|
||||
* See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
Megablocks is supported on ROCm 6.3.0.
|
||||
|
||||
Supported devices
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: AMD Instinct MI300X
|
||||
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
|
||||
- The ROCm-supported version of Megablocks is maintained in the official `https://github.com/ROCm/megablocks
|
||||
<https://github.com/ROCm/megablocks>`__ repository, which differs from the
|
||||
`https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
|
||||
|
||||
Supported models and features
|
||||
================================================================================
|
||||
- To get started and install Megablocks on ROCm, use the prebuilt :ref:`Docker image <megablocks-docker-compat>`,
|
||||
which includes ROCm, Megablocks, and all required dependencies.
|
||||
|
||||
This section summarizes the Megablocks features supported by ROCm.
|
||||
|
||||
* Distributed Pre-training
|
||||
* Activation Checkpointing and Recomputation
|
||||
* Distributed Optimizer
|
||||
* Mixture-of-Experts
|
||||
* dropless-Mixture-of-Experts
|
||||
|
||||
|
||||
.. _megablocks-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
The `ROCm Megablocks blog posts <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_
|
||||
guide how to leverage the ROCm platform for pre-training using the Megablocks framework.
|
||||
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
- See the :doc:`ROCm Megablocks installation guide <rocm-install-on-linux:install/3rd-party/megablocks-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://github.com/databricks/megablocks>`__
|
||||
for additional context.
|
||||
|
||||
.. _megablocks-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `ROCm Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest Megatron-LM version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
|
||||
AMD validates and publishes `Megablocks images <https://hub.docker.com/r/rocm/megablocks/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tag and associated
|
||||
inventories represent the latest available Megablocks version from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
@@ -80,6 +57,7 @@ Click |docker-icon| to view the image on Docker Hub.
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
@@ -89,5 +67,38 @@ Click |docker-icon| to view the image on Docker Hub.
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- MI300X
|
||||
|
||||
Supported models and features with ROCm 6.3.0
|
||||
================================================================================
|
||||
|
||||
This section summarizes the Megablocks features supported by ROCm.
|
||||
|
||||
* Distributed Pre-training
|
||||
* Activation Checkpointing and Recomputation
|
||||
* Distributed Optimizer
|
||||
* Mixture-of-Experts
|
||||
* dropless-Mixture-of-Experts
|
||||
|
||||
.. _megablocks-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__
|
||||
blog post guides how to leverage the ROCm platform for pre-training using the
|
||||
Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts
|
||||
(MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it
|
||||
demonstrates how block-sparse computations can enhance scalability and efficiency in MoE
|
||||
training. The guide provides step-by-step instructions for setting up the environment,
|
||||
including cloning the repository, building the Docker image, and running the training container.
|
||||
Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training
|
||||
language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE
|
||||
training workflows for large-scale transformer models.
|
||||
|
||||
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: PyTorch compatibility
|
||||
:keywords: GPU, PyTorch compatibility
|
||||
:keywords: GPU, PyTorch, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -15,40 +15,42 @@ deep learning. PyTorch on ROCm provides mixed-precision and large-scale training
|
||||
using `MIOpen <https://github.com/ROCm/MIOpen>`__ and
|
||||
`RCCL <https://github.com/ROCm/rccl>`__ libraries.
|
||||
|
||||
ROCm support for PyTorch is upstreamed into the official PyTorch repository. Due
|
||||
to independent compatibility considerations, this results in two distinct
|
||||
release cycles for PyTorch on ROCm:
|
||||
PyTorch provides two high-level features:
|
||||
|
||||
- ROCm PyTorch release:
|
||||
- Tensor computation (like NumPy) with strong GPU acceleration
|
||||
|
||||
- Provides the latest version of ROCm but might not necessarily support the
|
||||
latest stable PyTorch version.
|
||||
- Deep neural networks built on a tape-based autograd system (rapid computation
|
||||
of multiple partial derivatives or gradients)
|
||||
|
||||
- Offers :ref:`Docker images <pytorch-docker-compat>` with ROCm and PyTorch
|
||||
preinstalled.
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- ROCm PyTorch repository: `<https://github.com/ROCm/pytorch>`__
|
||||
ROCm support for PyTorch is upstreamed into the official PyTorch repository.
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency:
|
||||
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
to get started.
|
||||
- The ROCm-supported version of PyTorch is maintained in the official `https://github.com/ROCm/pytorch
|
||||
<https://github.com/ROCm/pytorch>`__ repository, which differs from the
|
||||
`https://github.com/pytorch/pytorch <https://github.com/pytorch/pytorch>`__ upstream repository.
|
||||
|
||||
- Official PyTorch release:
|
||||
- To get started and install PyTorch on ROCm, use the prebuilt :ref:`Docker images <pytorch-docker-compat>`,
|
||||
which include ROCm, PyTorch, and all required dependencies.
|
||||
|
||||
- Provides the latest stable version of PyTorch but might not necessarily
|
||||
support the latest ROCm version.
|
||||
- See the :doc:`ROCm PyTorch installation guide <rocm-install-on-linux:install/3rd-party/pytorch-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- Official PyTorch repository: `<https://github.com/pytorch/pytorch>`__
|
||||
|
||||
- See the `Nightly and latest stable version installation guide <https://pytorch.org/get-started/locally/>`__
|
||||
or `Previous versions <https://pytorch.org/get-started/previous-versions/>`__
|
||||
to get started.
|
||||
- You can also consult the upstream `Installation guide <https://pytorch.org/get-started/locally/>`__ or
|
||||
`Previous versions <https://pytorch.org/get-started/previous-versions/>`__ for additional context.
|
||||
|
||||
PyTorch includes tooling that generates HIP source code from the CUDA backend.
|
||||
This approach allows PyTorch to support ROCm without requiring manual code
|
||||
modifications. For more information, see :doc:`HIPIFY <hipify:index>`.
|
||||
|
||||
ROCm development is aligned with the stable release of PyTorch, while upstream
|
||||
PyTorch testing uses the stable release of ROCm to maintain consistency.
|
||||
Version support
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
AMD releases official `ROCm PyTorch Docker images <https://hub.docker.com/r/rocm/pytorch/tags>`_
|
||||
quarterly alongside new ROCm releases. These images undergo full AMD testing.
|
||||
|
||||
.. _pytorch-recommendations:
|
||||
|
||||
@@ -73,12 +75,12 @@ Use cases and recommendations
|
||||
|
||||
* The :doc:`Instinct MI300X workload optimization guide </how-to/rocm-for-ai/inference-optimization/workload>`
|
||||
provides detailed guidance on optimizing workloads for the AMD Instinct MI300X
|
||||
accelerator using ROCm. This guide helps users achieve optimal performance for
|
||||
GPU using ROCm. This guide helps users achieve optimal performance for
|
||||
deep learning and other high-performance computing tasks on the MI300X
|
||||
accelerator.
|
||||
GPU.
|
||||
|
||||
* The :doc:`Inception with PyTorch documentation </conceptual/ai-pytorch-inception>`
|
||||
describes how PyTorch integrates with ROCm for AI workloads It outlines the
|
||||
describes how PyTorch integrates with ROCm for AI workloads. It outlines the
|
||||
use of PyTorch on the ROCm platform and focuses on efficiently leveraging AMD
|
||||
GPU hardware for training and inference tasks in AI applications.
|
||||
|
||||
@@ -89,141 +91,12 @@ For more use cases and recommendations, see `ROCm PyTorch blog posts <https://ro
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch/tags>`__
|
||||
with ROCm backends on Docker Hub.
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `PyTorch images <https://hub.docker.com/r/rocm/pytorch>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories were tested on `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: PyTorch Docker image components
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- Apex
|
||||
- torchvision
|
||||
- TensorBoard
|
||||
- MAGMA
|
||||
- UCX
|
||||
- OMPI
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-6a287591500b4048a9556c1ecc92bc411fd3d552f6c8233bc399f18eb803e8d6"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.6.0/images/sha256-06b967629ba6657709f04169832cd769a11e6b491e8b1394c361d42d7a0c8b43"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.6.0 <https://github.com/ROCm/apex/tree/release/1.6.0>`__
|
||||
- `0.21.0 <https://github.com/pytorch/vision/tree/v0.21.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.5.1/images/sha256-62022414217ef6de33ac5b1341e57db8a48e8573fa2ace12d48aa5edd4b99ef0"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.10.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.11_pytorch_release_2.5.1/images/sha256-469a7f74fc149aff31797e011ee41978f6a190adc69fa423b3c6a718a77bd985"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 22.04
|
||||
- `3.11 <https://www.python.org/downloads/release/python-31113/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.5.1/images/sha256-37f41a1cd94019688669a1b20d33ea74156e0c129ef6b8270076ef214a6a1a2c"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.5.1 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.5.0 <https://github.com/ROCm/apex/tree/release/1.5.0>`__
|
||||
- `0.20.1 <https://github.com/pytorch/vision/tree/v0.20.1>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-60824ba83dc1b9d94164925af1f81c0235c105dd555091ec04c57e05177ead1b"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-fe944fe083312f901be6891ab4d3ffebf2eaf2cf4f5f0f435ef0b76ec714fabd"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
|
||||
- 22.04
|
||||
- `3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `1.4.0 <https://github.com/ROCm/apex/tree/release/1.4.0>`__
|
||||
- `0.19.0 <https://github.com/pytorch/vision/tree/v0.19.0>`__
|
||||
- `2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.12.1~rc2-1 <https://github.com/openucx/ucx/tree/v1.12.1>`__
|
||||
- `4.1.2-2ubuntu1 <https://github.com/open-mpi/ompi/tree/v4.1.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/pytorch/rocm6.4.2_ubuntu24.04_py3.12_pytorch_release_2.3.0/images/sha256-1d59251c47170c5b8960d1172a4dbe52f5793d8966edd778f168eaf32d56661a"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
|
||||
- 24.04
|
||||
- `3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `1.3.0 <https://github.com/ROCm/apex/tree/release/1.3.0>`__
|
||||
- `0.18.0 <https://github.com/pytorch/vision/tree/v0.18.0>`__
|
||||
- `2.13.0 <https://github.com/tensorflow/tensorboard/tree/2.13>`__
|
||||
- `master <https://bitbucket.org/icl/magma/src/master/>`__
|
||||
- `1.16.0+ds-5ubuntu1 <https://github.com/openucx/ucx/tree/v1.16.0>`__
|
||||
- `4.1.6-7ubuntu2 <https://github.com/open-mpi/ompi/tree/v4.1.6>`__
|
||||
To find the right image tag, see the :ref:`PyTorch on ROCm installation
|
||||
documentation <rocm-install-on-linux:pytorch-docker-support>` for a list of
|
||||
available ``rocm/pytorch`` images.
|
||||
|
||||
Key ROCm libraries for PyTorch
|
||||
================================================================================
|
||||
@@ -366,7 +239,8 @@ feature set available to developers.
|
||||
Supported modules and data types
|
||||
================================================================================
|
||||
|
||||
The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
|
||||
The following section outlines the supported data types, modules, and domain
|
||||
libraries available in PyTorch on ROCm.
|
||||
|
||||
Supported data types
|
||||
--------------------------------------------------------------------------------
|
||||
@@ -465,7 +339,7 @@ with ROCm.
|
||||
* - Library
|
||||
- Description
|
||||
|
||||
* - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
|
||||
* - `torchaudio <https://docs.pytorch.org/audio/stable/index.html>`_
|
||||
- Audio and signal processing library for PyTorch. Provides utilities for
|
||||
audio I/O, signal and data processing functions, datasets, model
|
||||
implementations, and application components for audio and speech
|
||||
@@ -475,7 +349,7 @@ with ROCm.
|
||||
you need to explicitly move audio data (waveform tensor) to GPU using
|
||||
``.to('cuda')``.
|
||||
|
||||
* - `torchtune <https://docs.pytorch.org/torchtune/stable/index.html>`_
|
||||
* - `torchtune <https://meta-pytorch.org/torchtune/stable/index.html>`_
|
||||
- PyTorch-native library designed for fine-tuning large language models
|
||||
(LLMs). Provides supports the full fine-tuning workflow and offers
|
||||
compatibility with popular production inference systems.
|
||||
@@ -487,21 +361,12 @@ with ROCm.
|
||||
popular datasets, model architectures, and common image transformations
|
||||
for computer vision applications.
|
||||
|
||||
* - `torchtext <https://docs.pytorch.org/text/stable/index.html>`_
|
||||
- Text processing library for PyTorch. Provides data processing utilities
|
||||
and popular datasets for natural language processing, including
|
||||
tokenization, vocabulary management, and text embeddings.
|
||||
|
||||
**Note:** ``torchtext`` does not implement ROCm-specific kernels.
|
||||
ROCm acceleration is provided through the underlying PyTorch framework
|
||||
and ROCm library integration. Only official release exists.
|
||||
|
||||
* - `torchdata <https://docs.pytorch.org/data/beta/index.html>`_
|
||||
* - `torchdata <https://meta-pytorch.org/data/beta/index.html#torchdata>`_
|
||||
- Beta library of common modular data loading primitives for easily
|
||||
constructing flexible and performant data pipelines, with features still
|
||||
in prototype stage.
|
||||
|
||||
* - `torchrec <https://docs.pytorch.org/torchrec/>`_
|
||||
* - `torchrec <https://meta-pytorch.org/torchrec/>`_
|
||||
- PyTorch domain library for common sparsity and parallelism primitives
|
||||
needed for large-scale recommender systems, enabling authors to train
|
||||
models with large embedding tables shared across many GPUs.
|
||||
@@ -533,3 +398,101 @@ with ROCm.
|
||||
dispatching.
|
||||
|
||||
**Note:** Only official release exists.
|
||||
|
||||
Key features and enhancements for PyTorch 2.9 with ROCm 7.1.1
|
||||
================================================================================
|
||||
- Scaled Dot Product Attention (SDPA) upgraded to use AOTriton version 0.11b.
|
||||
|
||||
- Default hipBLASLt support enabled for gfx908 architecture on ROCm 6.3 and later.
|
||||
|
||||
- MIOpen now supports channels last memory format for 3D convolutions and batch normalization.
|
||||
|
||||
- NHWC convolution operations in MIOpen optimized by eliminating unnecessary transpose operations.
|
||||
|
||||
- Improved tensor.item() performance by removing redundant synchronization.
|
||||
|
||||
- Enhanced performance for element-wise operations and reduction kernels.
|
||||
|
||||
- Added support for grouped GEMM operations through fbgemm_gpu generative AI components.
|
||||
|
||||
- Resolved device error in Inductor when using CUDA graph trees with HIP.
|
||||
|
||||
- Corrected logsumexp scaling in AOTriton-based SDPA implementation.
|
||||
|
||||
- Added stream graph capture status validation in memory copy synchronization functions.
|
||||
|
||||
Key features and enhancements for PyTorch 2.8 with ROCm 7.1
|
||||
================================================================================
|
||||
|
||||
- MIOpen deep learning optimizations: Further optimized NHWC BatchNorm feature.
|
||||
|
||||
- Added float8 support for the DeepSpeed extension, allowing for decreased
|
||||
memory footprint and increased throughput in training and inference workloads.
|
||||
|
||||
- ``torch.nn.functional.scaled_dot_product_attention`` now calling optimized
|
||||
flash attention kernel automatically.
|
||||
|
||||
Key features and enhancements for PyTorch 2.7/2.8 with ROCm 7.0
|
||||
================================================================================
|
||||
|
||||
- Enhanced TunableOp framework: Introduces ``tensorfloat32`` support for
|
||||
TunableOp operations, improved offline tuning for ScaledGEMM operations,
|
||||
submatrix offline tuning capabilities, and better logging for BLAS operations
|
||||
without bias vectors.
|
||||
|
||||
- Expanded GPU architecture support: Provides optimized support for newer GPU
|
||||
architectures, including gfx1200 and gfx1201 with preferred hipBLASLt backend
|
||||
selection, along with improvements for gfx950 and gfx1100 Series GPUs.
|
||||
|
||||
- Advanced Triton Integration: AOTriton 0.10b introduces official support for
|
||||
gfx950 and gfx1201, along with experimental support for gfx1101, gfx1151,
|
||||
gfx1150, and gfx1200.
|
||||
|
||||
- Improved element-wise kernel performance: Delivers enhanced vectorized
|
||||
element-wise kernels with better support for heterogeneous tensor types and
|
||||
optimized input vectorization for tensors with mixed data types.
|
||||
|
||||
- MIOpen deep learning optimizations: Enables NHWC BatchNorm by default on
|
||||
ROCm 7.0+, provides ``maxpool`` forward and backward performance improvements
|
||||
targeting ResNet scenarios, and includes updated launch configurations for
|
||||
better performance.
|
||||
|
||||
- Enhanced memory and tensor operations: Features fixes for in-place ``aten``
|
||||
sum operations with specialized templated kernels, improved 3D tensor
|
||||
performance with NHWC format, and better handling of memory-bound matrix
|
||||
multiplication operations.
|
||||
|
||||
- Robust testing and quality improvements: Includes comprehensive test suite
|
||||
updates with improved tolerance handling for Navi3x architectures, generalized
|
||||
ROCm-specific test conditions, and enhanced unit test coverage for Flash
|
||||
Attention and Memory Efficient operations.
|
||||
|
||||
- Composable Kernel (CK) updates: Features updated CK submodule integration with
|
||||
the latest optimizations and performance improvements for core mathematical
|
||||
operations.
|
||||
|
||||
- Development and debugging enhancements: Includes improved source handling for
|
||||
dynamic compilation, better error handling for atomic operations, and enhanced
|
||||
state checking for trace operations.
|
||||
|
||||
- Integrate APEX fused layer normalization, which can have positive impact on
|
||||
text-to-video models.
|
||||
|
||||
- Integrate APEX distributed fused LAMB and distributed fused ADAM, which can
|
||||
have positive impact on BERT-L and Llama2-SFT.
|
||||
|
||||
- FlashAttention v3 has been integrated for AMD GPUs.
|
||||
|
||||
- `Pytorch C++ extensions <https://pytorch.org/tutorials/advanced/cpp_extension.html>`_
|
||||
provide a mechanism for compiling custom operations that can be used during
|
||||
network training or inference. For AMD platforms, ``amdclang++`` has been
|
||||
validated as the supported compiler for building these extensions.
|
||||
|
||||
Known issues and notes for PyTorch 2.7/2.8 with ROCm 7.0 and ROCm 7.1
|
||||
================================================================================
|
||||
|
||||
- The ``matmul.allow_fp16_reduced_precision_reduction`` and
|
||||
``matmul.allow_bf16_reduced_precision_reduction`` options under
|
||||
``torch.backends.cuda`` are not supported. As a result,
|
||||
reduced-precision reductions using FP16 or BF16 accumulation types are not
|
||||
available.
|
||||
|
||||
114
docs/compatibility/ml-compatibility/ray-compatibility.rst
Normal file
114
docs/compatibility/ml-compatibility/ray-compatibility.rst
Normal file
@@ -0,0 +1,114 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Ray compatibility
|
||||
:keywords: GPU, Ray, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
Ray compatibility
|
||||
*******************************************************************************
|
||||
|
||||
Ray is a unified framework for scaling AI and Python applications from your laptop
|
||||
to a full cluster, without changing your code. Ray consists of `a core distributed
|
||||
runtime <https://docs.ray.io/en/latest/ray-core/walkthrough.html>`__ and a set of
|
||||
`AI libraries <https://docs.ray.io/en/latest/ray-air/getting-started.html>`__ for
|
||||
simplifying machine learning computations.
|
||||
|
||||
Ray is a general-purpose framework that runs many types of workloads efficiently.
|
||||
Any Python application can be scaled with Ray, without extra infrastructure.
|
||||
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- The ROCm-supported version of Ray is maintained in the official `https://github.com/ROCm/ray
|
||||
<https://github.com/ROCm/ray>`__ repository, which differs from the
|
||||
`https://github.com/ray-project/ray <https://github.com/ray-project/ray>`__ upstream repository.
|
||||
|
||||
- To get started and install Ray on ROCm, use the prebuilt :ref:`Docker image <ray-docker-compat>`,
|
||||
which includes ROCm, Ray, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm Ray installation guide <rocm-install-on-linux:install/3rd-party/ray-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://docs.ray.io/en/latest/ray-overview/installation.html>`__
|
||||
for additional context.
|
||||
|
||||
.. _ray-docker-compat:
|
||||
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `ROCm Ray Docker images <https://hub.docker.com/r/rocm/ray/tags>`__
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and
|
||||
associated inventories represent the latest Ray version from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- Ray
|
||||
- Pytorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/ray/ray-2.51.1_rocm7.0.0_ubuntu22.04_py3.12_pytorch2.9.0/images/sha256-a02f6766b4ba406f88fd7e85707ec86c04b569834d869a08043ec9bcbd672168"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- `2.51.1 <https://github.com/ROCm/ray/tree/release/2.51.1>`__
|
||||
- 2.9.0a0+git1c57644
|
||||
- 22.04
|
||||
- `3.12.12 <https://www.python.org/downloads/release/python-31212/>`__
|
||||
- MI300X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/ray/ray-2.48.0.post0_rocm6.4.1_ubuntu24.04_py3.12_pytorch2.6.0/images/sha256-0d166fe6bdced38338c78eedfb96eff92655fb797da3478a62dd636365133cc0"><i class="fab fa-docker fa-lg"></i> rocm/ray</a>
|
||||
- `6.4.1 <https://repo.radeon.com/rocm/apt/6.4.1/>`__
|
||||
- `2.48.0.post0 <https://github.com/ROCm/ray/tree/release/2.48.0.post0>`__
|
||||
- 2.6.0+git684f6f2
|
||||
- 24.04
|
||||
- `3.12.10 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- MI300X, MI210
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm
|
||||
Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__
|
||||
blog provides an overview of Volcano Engine Reinforcement Learning (verl)
|
||||
for large language models (LLMs) and discusses its benefits in large-scale
|
||||
reinforcement learning from human feedback (RLHF). It uses Ray as part of a
|
||||
hybrid orchestration engine to schedule and coordinate training and inference
|
||||
tasks in parallel, enabling optimized resource utilization and potential overlap
|
||||
between these phases. This dynamic resource allocation strategy significantly
|
||||
improves overall system efficiency. The blog presents verl’s performance results,
|
||||
focusing on throughput and convergence accuracy achieved on AMD Instinct™ MI300X
|
||||
GPUs. Follow this guide to get started with verl on AMD Instinct GPUs and
|
||||
accelerate your RLHF training with ROCm-optimized performance.
|
||||
|
||||
* The `Exploring Use Cases for Scalable AI: Implementing Ray with ROCm Support for Efficient ML Workflows
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/rocm-ray/README.html>`__
|
||||
blog post describes key use cases such as training and inference for large language models (LLMs),
|
||||
model serving, hyperparameter tuning, reinforcement learning, and the orchestration of large-scale
|
||||
workloads using Ray in the ROCm environment.
|
||||
|
||||
For more use cases and recommendations, see the AMD GPU tabs in the `Accelerator Support
|
||||
topic <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#accelerator-support>`__
|
||||
of the Ray core documentation and refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__,
|
||||
where you can search for Ray examples and best practices to optimize your workloads on AMD GPUs.
|
||||
|
||||
Previous versions
|
||||
===============================================================================
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/ray-history` to find documentation for previous releases
|
||||
of the ``ROCm/ray`` Docker image.
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: Stanford Megatron-LM compatibility
|
||||
:keywords: Stanford, Megatron-LM, compatibility
|
||||
:keywords: Stanford, Megatron-LM, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -10,34 +10,76 @@
|
||||
Stanford Megatron-LM compatibility
|
||||
********************************************************************************
|
||||
|
||||
Stanford Megatron-LM is a large-scale language model training framework developed by NVIDIA `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_. It is
|
||||
designed to train massive transformer-based language models efficiently by model and data parallelism.
|
||||
Stanford Megatron-LM is a large-scale language model training framework developed
|
||||
by NVIDIA at `https://github.com/NVIDIA/Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_.
|
||||
It is designed to train massive transformer-based language models efficiently by model
|
||||
and data parallelism.
|
||||
|
||||
* ROCm support for Stanford Megatron-LM is hosted in the official `https://github.com/ROCm/Stanford-Megatron-LM <https://github.com/ROCm/Stanford-Megatron-LM>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>` with ROCm, PyTorch, and Megatron-LM preinstalled.
|
||||
* See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>` to install and get started.
|
||||
It provides efficient tensor, pipeline, and sequence-based model parallelism for
|
||||
pre-training transformer-based language models such as GPT (Decoder Only), BERT
|
||||
(Encoder Only), and T5 (Encoder-Decoder).
|
||||
|
||||
.. note::
|
||||
|
||||
Stanford Megatron-LM is supported on ROCm 6.3.0.
|
||||
|
||||
|
||||
Supported Devices
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- **Officially Supported**: AMD Instinct MI300X
|
||||
- **Partially Supported** (functionality or performance limitations): AMD Instinct MI250X, MI210X
|
||||
- The ROCm-supported version of Stanford Megatron-LM is maintained in the official `https://github.com/ROCm/Stanford-Megatron-LM
|
||||
<https://github.com/ROCm/Stanford-Megatron-LM>`__ repository, which differs from the
|
||||
`https://github.com/stanford-futuredata/Megatron-LM <https://github.com/stanford-futuredata/Megatron-LM>`__ upstream repository.
|
||||
|
||||
- To get started and install Stanford Megatron-LM on ROCm, use the prebuilt :ref:`Docker image <megatron-lm-docker-compat>`,
|
||||
which includes ROCm, Stanford Megatron-LM, and all required dependencies.
|
||||
|
||||
Supported models and features
|
||||
- See the :doc:`ROCm Stanford Megatron-LM installation guide <rocm-install-on-linux:install/3rd-party/stanford-megatron-lm-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the upstream `Installation guide <https://github.com/NVIDIA/Megatron-LM>`__
|
||||
for additional context.
|
||||
|
||||
.. _megatron-lm-docker-compat:
|
||||
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/stanford-megatron-lm/tags>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest Stanford Megatron-LM version from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- Stanford Megatron-LM
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i> rocm/stanford-megatron-lm</a>
|
||||
|
||||
- `6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_
|
||||
- `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
- MI300X
|
||||
|
||||
Supported models and features with ROCm 6.3.0
|
||||
================================================================================
|
||||
|
||||
This section details models & features that are supported by the ROCm version on Stanford Megatron-LM.
|
||||
|
||||
Models:
|
||||
|
||||
* Bert
|
||||
* BERT
|
||||
* GPT
|
||||
* T5
|
||||
* ICT
|
||||
@@ -54,47 +96,21 @@ Features:
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
See the `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs blog <https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`_ post
|
||||
to leverage the ROCm platform for pre-training by using the Stanford Megatron-LM framework of pre-processing datasets on AMD GPUs.
|
||||
Coverage includes:
|
||||
The following blog post mentions Megablocks, but you can run Stanford Megatron-LM with the same steps to pre-process datasets on AMD GPUs:
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
* The `Efficient MoE training on AMD ROCm: How-to use Megablocks on AMD GPUs
|
||||
<https://rocm.blogs.amd.com/artificial-intelligence/megablocks/README.html>`__
|
||||
blog post guides how to leverage the ROCm platform for pre-training using the
|
||||
Megablocks framework. It introduces a streamlined approach for training Mixture-of-Experts
|
||||
(MoE) models using the Megablocks library on AMD hardware. Focusing on GPT-2, it
|
||||
demonstrates how block-sparse computations can enhance scalability and efficiency in MoE
|
||||
training. The guide provides step-by-step instructions for setting up the environment,
|
||||
including cloning the repository, building the Docker image, and running the training container.
|
||||
Additionally, it offers insights into utilizing the ``oscar-1GB.json`` dataset for pre-training
|
||||
language models. By leveraging Megablocks and the ROCm platform, you can optimize your MoE
|
||||
training workflows for large-scale transformer models.
|
||||
|
||||
It features how to pre-process datasets and how to begin pre-training on AMD GPUs through:
|
||||
|
||||
.. _megatron-lm-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes `Stanford Megatron-LM images <https://hub.docker.com/r/rocm/megatron-lm>`_
|
||||
with ROCm and Pytorch backends on Docker Hub. The following Docker image tags and associated
|
||||
inventories represent the latest Megatron-LM version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.0 <https://repo.radeon.com/rocm/apt/6.3/>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- Stanford Megatron-LM
|
||||
- PyTorch
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/stanford-megatron-lm/stanford-megatron-lm85f95ae_rocm6.3.0_ubuntu24.04_py3.12_pytorch2.4.0/images/sha256-070556f078be10888a1421a2cb4f48c29f28b02bfeddae02588d1f7fc02a96a6"><i class="fab fa-docker fa-lg"></i></a>
|
||||
|
||||
- `85f95ae <https://github.com/stanford-futuredata/Megatron-LM/commit/85f95aef3b648075fe6f291c86714fdcbd9cd1f5>`_
|
||||
- `2.4.0 <https://github.com/ROCm/pytorch/tree/release/2.4>`_
|
||||
- 24.04
|
||||
- `3.12.9 <https://www.python.org/downloads/release/python-3129/>`_
|
||||
|
||||
|
||||
|
||||
* Single-GPU pre-training
|
||||
* Multi-GPU pre-training
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: Taichi compatibility
|
||||
:keywords: GPU, Taichi compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
*******************************************************************************
|
||||
Taichi compatibility
|
||||
*******************************************************************************
|
||||
|
||||
`Taichi <https://www.taichi-lang.org/>`_ is an open-source, imperative, and parallel
|
||||
programming language designed for high-performance numerical computation.
|
||||
Embedded in Python, it leverages just-in-time (JIT) compilation frameworks such as LLVM to accelerate
|
||||
compute-intensive Python code by compiling it to native GPU or CPU instructions.
|
||||
|
||||
Taichi is widely used across various domains, including real-time physical simulation,
|
||||
numerical computing, augmented reality, artificial intelligence, computer vision, robotics,
|
||||
visual effects in film and gaming, and general-purpose computing.
|
||||
|
||||
* ROCm support for Taichi is hosted in the official `https://github.com/ROCm/taichi <https://github.com/ROCm/taichi>`_ repository.
|
||||
* Due to independent compatibility considerations, this location differs from the `https://github.com/taichi-dev <https://github.com/taichi-dev>`_ upstream repository.
|
||||
* Use the prebuilt :ref:`Docker image <taichi-docker-compat>` with ROCm, PyTorch, and Taichi preinstalled.
|
||||
* See the :doc:`ROCm Taichi installation guide <rocm-install-on-linux:install/3rd-party/taichi-install>` to install and get started.
|
||||
|
||||
.. note::
|
||||
|
||||
Taichi is supported on ROCm 6.3.2.
|
||||
|
||||
Supported devices and features
|
||||
===============================================================================
|
||||
There is support through the ROCm software stack for all Taichi GPU features on AMD Instinct MI250X and MI210X series GPUs with the exception of Taichi’s GPU rendering system, CGUI.
|
||||
AMD Instinct MI300X series GPUs will be supported by November.
|
||||
|
||||
.. _taichi-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
To fully leverage Taichi's performance capabilities in compute-intensive tasks, it is best to adhere to specific coding patterns and utilize Taichi decorators.
|
||||
A collection of example use cases is available in the `https://github.com/ROCm/taichi_examples <https://github.com/ROCm/taichi_examples>`_ repository,
|
||||
providing practical insights and foundational knowledge for working with the Taichi programming language.
|
||||
You can also refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_ to search for Taichi examples and best practices to optimize your workflows on AMD GPUs.
|
||||
|
||||
.. _taichi-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm Taichi Docker images <https://hub.docker.com/r/rocm/taichi/tags>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories
|
||||
represent the latest Taichi version from the official Docker Hub.
|
||||
The Docker images have been validated for `ROCm 6.3.2 <https://rocm.docs.amd.com/en/docs-6.3.2/about/release-notes.html>`_.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- Taichi
|
||||
- Ubuntu
|
||||
- Python
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/taichi/taichi-1.8.0b1_rocm6.3.2_ubuntu22.04_py3.10.12/images/sha256-e016964a751e6a92199032d23e70fa3a564fff8555afe85cd718f8aa63f11fc6"><i class="fab fa-docker fa-lg"></i> rocm/taichi</a>
|
||||
- `6.3.2 <https://repo.radeon.com/rocm/apt/6.3.2/>`_
|
||||
- `1.8.0b1 <https://github.com/taichi-dev/taichi>`_
|
||||
- 22.04
|
||||
- `3.10.12 <https://www.python.org/downloads/release/python-31012/>`_
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: TensorFlow compatibility
|
||||
:keywords: GPU, TensorFlow compatibility
|
||||
:keywords: GPU, TensorFlow, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -12,115 +12,46 @@ TensorFlow compatibility
|
||||
|
||||
`TensorFlow <https://www.tensorflow.org/>`__ is an open-source library for
|
||||
solving machine learning, deep learning, and AI problems. It can solve many
|
||||
problems across different sectors and industries but primarily focuses on
|
||||
neural network training and inference. It is one of the most popular and
|
||||
in-demand frameworks and is very active in open-source contribution and
|
||||
development.
|
||||
problems across different sectors and industries, but primarily focuses on
|
||||
neural network training and inference. It is one of the most popular deep
|
||||
learning frameworks and is very active in open-source development.
|
||||
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
- The ROCm-supported version of TensorFlow is maintained in the official `https://github.com/ROCm/tensorflow-upstream
|
||||
<https://github.com/ROCm/tensorflow-upstream>`__ repository, which differs from the
|
||||
`https://github.com/tensorflow/tensorflow <https://github.com/tensorflow/tensorflow>`__ upstream repository.
|
||||
|
||||
- To get started and install TensorFlow on ROCm, use the prebuilt :ref:`Docker images <tensorflow-docker-compat>`,
|
||||
which include ROCm, TensorFlow, and all required dependencies.
|
||||
|
||||
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
- You can also consult the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list
|
||||
for additional context.
|
||||
|
||||
Version support
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The `official TensorFlow repository <http://github.com/tensorflow/tensorflow>`__
|
||||
includes full ROCm support. AMD maintains a TensorFlow `ROCm repository
|
||||
<http://github.com/rocm/tensorflow-upstream>`__ in order to quickly add bug
|
||||
fixes, updates, and support for the latest ROCM versions.
|
||||
|
||||
- ROCm TensorFlow release:
|
||||
|
||||
- Offers :ref:`Docker images <tensorflow-docker-compat>` with
|
||||
ROCm and TensorFlow pre-installed.
|
||||
|
||||
- ROCm TensorFlow repository: `<https://github.com/ROCm/tensorflow-upstream>`__
|
||||
|
||||
- See the :doc:`ROCm TensorFlow installation guide <rocm-install-on-linux:install/3rd-party/tensorflow-install>`
|
||||
to get started.
|
||||
|
||||
- Official TensorFlow release:
|
||||
|
||||
- Official TensorFlow repository: `<https://github.com/tensorflow/tensorflow>`__
|
||||
|
||||
- See the `TensorFlow API versions <https://www.tensorflow.org/versions>`__ list.
|
||||
|
||||
.. note::
|
||||
|
||||
The official TensorFlow documentation does not cover ROCm support. Use the
|
||||
ROCm documentation for installation instructions for Tensorflow on ROCm.
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/tensorflow-install`.
|
||||
fixes, updates, and support for the latest ROCm versions.
|
||||
|
||||
.. _tensorflow-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
===============================================================================
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
AMD provides preconfigured Docker images with TensorFlow and the ROCm backend.
|
||||
These images are published on `Docker Hub <https://hub.docker.com/r/rocm/tensorflow>`__ and are the
|
||||
recommended way to get started with deep learning with TensorFlow on ROCm.
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `TensorFlow images
|
||||
<https://hub.docker.com/r/rocm/tensorflow>`__ with ROCm backends on
|
||||
Docker Hub. The following Docker image tags and associated inventories are
|
||||
validated for `ROCm 6.4.2 <https://repo.radeon.com/rocm/apt/6.4.2/>`__. Click
|
||||
the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
.. list-table:: TensorFlow Docker image components
|
||||
:header-rows: 1
|
||||
|
||||
* - Docker image
|
||||
- TensorFlow
|
||||
- Ubuntu
|
||||
- Python
|
||||
- TensorBoard
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
To find the right image tag, see the :ref:`TensorFlow on ROCm installation
|
||||
documentation <rocm-install-on-linux:tensorflow-docker-support>` for a list of
|
||||
available ``rocm/tensorflow`` images.
|
||||
|
||||
|
||||
Critical ROCm libraries for TensorFlow
|
||||
@@ -205,7 +136,7 @@ The following section maps supported data types and GPU-accelerated TensorFlow
|
||||
features to their minimum supported ROCm and TensorFlow versions.
|
||||
|
||||
Data types
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
---------------
|
||||
|
||||
The data type of a tensor is specified using the ``dtype`` attribute or
|
||||
argument, and TensorFlow supports a wide range of data types for different use
|
||||
@@ -323,7 +254,7 @@ are as follows:
|
||||
- 1.7
|
||||
|
||||
Features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
---------------
|
||||
|
||||
This table provides an overview of key features in TensorFlow and their
|
||||
availability in ROCm.
|
||||
@@ -415,7 +346,7 @@ availability in ROCm.
|
||||
- 1.9.2
|
||||
|
||||
Distributed library features
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
-----------------------------------
|
||||
|
||||
Enables developers to scale computations across multiple devices on a single machine or
|
||||
across multiple machines.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
.. meta::
|
||||
:description: verl compatibility
|
||||
:keywords: GPU, verl compatibility
|
||||
:keywords: GPU, verl, deep learning, framework compatibility
|
||||
|
||||
.. version-set:: rocm_version latest
|
||||
|
||||
@@ -10,77 +10,109 @@
|
||||
verl compatibility
|
||||
*******************************************************************************
|
||||
|
||||
Volcano Engine Reinforcement Learning for LLMs (verl) is a reinforcement learning framework designed for large language models (LLMs).
|
||||
verl offers a scalable, open-source fine-tuning solution optimized for AMD Instinct GPUs with full ROCm support.
|
||||
Volcano Engine Reinforcement Learning for LLMs (`verl <https://verl.readthedocs.io/en/latest/>`__)
|
||||
is a reinforcement learning framework designed for large language models (LLMs).
|
||||
verl offers a scalable, open-source fine-tuning solution by using a hybrid programming model
|
||||
that makes it easy to define and run complex post-training dataflows efficiently.
|
||||
|
||||
* See the `verl documentation <https://verl.readthedocs.io/en/latest/>`_ for more information about verl.
|
||||
* The official verl GitHub repository is `https://github.com/volcengine/verl <https://github.com/volcengine/verl>`_.
|
||||
* Use the AMD-validated :ref:`Docker images <verl-docker-compat>` with ROCm and verl preinstalled.
|
||||
* See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>` to install and get started.
|
||||
Its modular APIs separate computation from data, allowing smooth integration with other frameworks.
|
||||
It also supports flexible model placement across GPUs for efficient scaling on different cluster sizes.
|
||||
verl achieves high training and generation throughput by building on existing LLM frameworks.
|
||||
Its 3D-HybridEngine reduces memory use and communication overhead when switching between training
|
||||
and inference, improving overall performance.
|
||||
|
||||
.. note::
|
||||
|
||||
verl is supported on ROCm 6.2.0.
|
||||
|
||||
.. _verl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
Support overview
|
||||
================================================================================
|
||||
|
||||
The benefits of verl in large-scale reinforcement learning from human feedback (RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`_ blog.
|
||||
- The ROCm-supported version of verl is maintained in the official `https://github.com/ROCm/verl
|
||||
<https://github.com/ROCm/verl>`__ repository, which differs from the
|
||||
`https://github.com/volcengine/verl <https://github.com/volcengine/verl>`__ upstream repository.
|
||||
|
||||
.. _verl-supported_features:
|
||||
- To get started and install verl on ROCm, use the prebuilt :ref:`Docker image <verl-docker-compat>`,
|
||||
which includes ROCm, verl, and all required dependencies.
|
||||
|
||||
Supported features
|
||||
===============================================================================
|
||||
- See the :doc:`ROCm verl installation guide <rocm-install-on-linux:install/3rd-party/verl-install>`
|
||||
for installation and setup instructions.
|
||||
|
||||
The following table shows verl on ROCm support for GPU-accelerated modules.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Module
|
||||
- Description
|
||||
- verl version
|
||||
- ROCm version
|
||||
* - ``FSDP``
|
||||
- Training engine
|
||||
- 0.3.0.post0
|
||||
- 6.2.0
|
||||
* - ``vllm``
|
||||
- Inference engine
|
||||
- 0.3.0.post0
|
||||
- 6.2.0
|
||||
- You can also consult the upstream `verl documentation <https://verl.readthedocs.io/en/latest/>`__
|
||||
for additional context.
|
||||
|
||||
.. _verl-docker-compat:
|
||||
|
||||
Docker image compatibility
|
||||
Compatibility matrix
|
||||
================================================================================
|
||||
|
||||
.. |docker-icon| raw:: html
|
||||
|
||||
<i class="fab fa-docker"></i>
|
||||
|
||||
AMD validates and publishes ready-made `ROCm verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tags and associated inventories represent the available verl versions from the official Docker Hub.
|
||||
AMD validates and publishes `verl Docker images <https://hub.docker.com/r/rocm/verl/tags>`_
|
||||
with ROCm backends on Docker Hub. The following Docker image tag and associated inventories
|
||||
represent the latest verl version from the official Docker Hub.
|
||||
Click |docker-icon| to view the image on Docker Hub.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:class: docker-image-compatibility
|
||||
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- verl
|
||||
- Ubuntu
|
||||
- Pytorch
|
||||
- Python
|
||||
- vllm
|
||||
* - Docker image
|
||||
- ROCm
|
||||
- verl
|
||||
- Ubuntu
|
||||
- PyTorch
|
||||
- Python
|
||||
- vllm
|
||||
- GPU
|
||||
|
||||
* - .. raw:: html
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
|
||||
- `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`_
|
||||
- `0.3.0post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`_
|
||||
- 20.04
|
||||
- `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`_
|
||||
- `3.9.19 <https://www.python.org/downloads/release/python-3919/>`_
|
||||
- `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`_
|
||||
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.6.0.amd0_rocm7.0_vllm0.11.0.dev/images/sha256-f70a3ebc94c1f66de42a2fcc3f8a6a8d6d0881eb0e65b6958d7d6d24b3eecb0d"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
|
||||
- `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
|
||||
- `0.6.0 <https://github.com/volcengine/verl/releases/tag/v0.6.0>`__
|
||||
- 22.04
|
||||
- `2.9.0 <https://github.com/ROCm/pytorch/tree/release/2.9-rocm7.x-gfx115x>`__
|
||||
- `3.12.11 <https://www.python.org/downloads/release/python-31211/>`__
|
||||
- `0.11.0 <https://github.com/vllm-project/vllm/releases/tag/v0.11.0>`__
|
||||
- MI300X
|
||||
|
||||
* - .. raw:: html
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/verl/verl-0.3.0.post0_rocm6.2_vllm0.6.3/images/sha256-cbe423803fd7850448b22444176bee06f4dcf22cd3c94c27732752d3a39b04b2"><i class="fab fa-docker fa-lg"></i> rocm/verl</a>
|
||||
- `6.2.0 <https://repo.radeon.com/rocm/apt/6.2/>`__
|
||||
- `0.3.0.post0 <https://github.com/volcengine/verl/releases/tag/v0.3.0.post0>`__
|
||||
- 20.04
|
||||
- `2.5.0 <https://github.com/ROCm/pytorch/tree/release/2.5>`__
|
||||
- `3.9.19 <https://www.python.org/downloads/release/python-3919/>`__
|
||||
- `0.6.3 <https://github.com/vllm-project/vllm/releases/tag/v0.6.3>`__
|
||||
- MI300X
|
||||
|
||||
.. _verl-supported_features:
|
||||
|
||||
Supported modules with verl on ROCm
|
||||
===============================================================================
|
||||
|
||||
The following GPU-accelerated modules are supported with verl on ROCm:
|
||||
|
||||
- ``FSDP``: Training engine
|
||||
- ``vllm``: Inference engine
|
||||
|
||||
.. _verl-recommendations:
|
||||
|
||||
Use cases and recommendations
|
||||
================================================================================
|
||||
|
||||
* The benefits of verl in large-scale reinforcement learning from human feedback
|
||||
(RLHF) are discussed in the `Reinforcement Learning from Human Feedback on AMD
|
||||
GPUs with verl and ROCm Integration <https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html>`__
|
||||
blog. The blog post outlines how the Volcano Engine Reinforcement Learning
|
||||
(verl) framework integrates with the AMD ROCm platform to optimize training on
|
||||
AMD Instinct™ GPUs. The guide details the process of building a Docker image,
|
||||
setting up single-node and multi-node training environments, and highlights
|
||||
performance benchmarks demonstrating improved throughput and convergence accuracy.
|
||||
This resource serves as a comprehensive starting point for deploying verl on AMD GPUs,
|
||||
facilitating efficient RLHF training workflows.
|
||||
|
||||
Previous versions
|
||||
===============================================================================
|
||||
See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/verl-history` to find documentation for previous releases
|
||||
of the ``ROCm/verl`` Docker image.
|
||||
|
||||
@@ -13,21 +13,22 @@
|
||||
:gutter: 1
|
||||
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI300 series**
|
||||
**AMD Instinct MI300 Series**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI300 series of GPU accelerators and the CDNA™ 3
|
||||
Review hardware aspects of the AMD Instinct™ MI300 Series GPUs and the CDNA™ 3
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
|
||||
* [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
|
||||
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
|
||||
* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
* [MI300 performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
* [MI350 Series performance counters](./gpu-arch/mi350-performance-counters.rst)
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI200 series**
|
||||
**AMD Instinct MI200 Series**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI200 series of GPU accelerators and the CDNA™ 2
|
||||
Review hardware aspects of the AMD Instinct™ MI200 Series GPUs and the CDNA™ 2
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI250 microarchitecture](./gpu-arch/mi250.md)
|
||||
@@ -40,7 +41,7 @@ architecture.
|
||||
:::{grid-item-card}
|
||||
**AMD Instinct MI100**
|
||||
|
||||
Review hardware aspects of the AMD Instinct™ MI100 series of GPU accelerators and the CDNA™ 1
|
||||
Review hardware aspects of the AMD Instinct™ MI100 Series GPUs and the CDNA™ 1
|
||||
architecture.
|
||||
|
||||
* [AMD Instinct™ MI100 microarchitecture](./gpu-arch/mi100.md)
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Learn about the AMD Instinct MI100 series architecture."
|
||||
"description lang=en": "Learn about the AMD Instinct MI100 Series architecture."
|
||||
"keywords": "Instinct, MI100, microarchitecture, AMD, ROCm"
|
||||
---
|
||||
|
||||
# AMD Instinct™ MI100 microarchitecture
|
||||
|
||||
The following image shows the node-level architecture of a system that
|
||||
comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ accelerators.
|
||||
comprises two AMD EPYC™ processors and (up to) eight AMD Instinct™ GPUs.
|
||||
The two EPYC processors are connected to each other with the AMD Infinity™
|
||||
fabric which provides a high-bandwidth (up to 18 GT/sec) and coherent links such
|
||||
that each processor can access the available node memory as a single
|
||||
@@ -18,29 +18,29 @@ available to connect the processors plus one PCIe Gen 4 x16 link per processor
|
||||
can attach additional I/O devices such as the host adapters for the network
|
||||
fabric.
|
||||
|
||||

|
||||

|
||||
|
||||
In a typical node configuration, each processor can host up to four AMD
|
||||
Instinct™ accelerators that are attached using PCIe Gen 4 links at 16 GT/sec,
|
||||
Instinct™ GPUs that are attached using PCIe Gen 4 links at 16 GT/sec,
|
||||
which corresponds to a peak bidirectional link bandwidth of 32 GB/sec. Each hive
|
||||
of four accelerators can participate in a fully connected, coherent AMD
|
||||
Instinct™ fabric that connects the four accelerators using 23 GT/sec AMD
|
||||
of four GPUs can participate in a fully connected, coherent AMD
|
||||
Instinct™ fabric that connects the four GPUs using 23 GT/sec AMD
|
||||
Infinity fabric links that run at a higher frequency than the inter-processor
|
||||
links. This inter-GPU link can be established in certified server systems if the
|
||||
GPUs are mounted in neighboring PCIe slots by installing the AMD Infinity
|
||||
Fabric™ bridge for the AMD Instinct™ accelerators.
|
||||
Fabric™ bridge for the AMD Instinct™ GPUs.
|
||||
|
||||
## Microarchitecture
|
||||
|
||||
The microarchitecture of the AMD Instinct accelerators is based on the AMD CDNA
|
||||
The microarchitecture of the AMD Instinct GPUs is based on the AMD CDNA
|
||||
architecture, which targets compute applications such as high-performance
|
||||
computing (HPC) and AI & machine learning (ML) that run on everything from
|
||||
individual servers to the world's largest exascale supercomputers. The overall
|
||||
system architecture is designed for extreme scalability and compute performance.
|
||||
|
||||
")
|
||||
")
|
||||
|
||||
The above image shows the AMD Instinct accelerator with its PCIe Gen 4 x16
|
||||
The above image shows the AMD Instinct GPU with its PCIe Gen 4 x16
|
||||
link (16 GT/sec, at the bottom) that connects the GPU to (one of) the host
|
||||
processor(s). It also shows the three AMD Infinity Fabric ports that provide
|
||||
high-speed links (23 GT/sec, also at the bottom) to the other GPUs of the local
|
||||
@@ -48,7 +48,7 @@ hive.
|
||||
|
||||
On the left and right of the floor plan, the High Bandwidth Memory (HBM)
|
||||
attaches via the GPU memory controller. The MI100 generation of the AMD
|
||||
Instinct accelerator offers four stacks of HBM generation 2 (HBM2) for a total
|
||||
Instinct GPU offers four stacks of HBM generation 2 (HBM2) for a total
|
||||
of 32GB with a 4,096bit-wide memory interface. The peak memory bandwidth of the
|
||||
attached HBM2 is 1.228 TB/sec at a memory clock frequency of 1.2 GHz.
|
||||
|
||||
@@ -64,7 +64,7 @@ Therefore, the theoretical maximum FP64 peak performance is 11.5 TFLOPS
|
||||

|
||||
|
||||
The preceding image shows the block diagram of a single CU of an AMD Instinct™
|
||||
MI100 accelerator and summarizes how instructions flow through the execution
|
||||
MI100 GPU and summarizes how instructions flow through the execution
|
||||
engines. The CU fetches the instructions via a 32KB instruction cache and moves
|
||||
them forward to execution via a dispatcher. The CU can handle up to ten
|
||||
wavefronts at a time and feed their instructions into the execution unit. The
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Learn about the AMD Instinct MI250 series architecture."
|
||||
"description lang=en": "Learn about the AMD Instinct MI250 Series architecture."
|
||||
"keywords": "Instinct, MI250, microarchitecture, AMD, ROCm"
|
||||
---
|
||||
|
||||
# AMD Instinct™ MI250 microarchitecture
|
||||
|
||||
The microarchitecture of the AMD Instinct MI250 accelerators is based on the
|
||||
The microarchitecture of the AMD Instinct MI250 GPU is based on the
|
||||
AMD CDNA 2 architecture that targets compute applications such as HPC,
|
||||
artificial intelligence (AI), and machine learning (ML) and that run on
|
||||
everything from individual servers to the world’s largest exascale
|
||||
@@ -40,7 +40,7 @@ execution units (also called matrix cores), which are geared toward executing
|
||||
matrix operations like matrix-matrix multiplications. For FP64, the peak
|
||||
performance of these units amounts to 90.5 TFLOPS.
|
||||
|
||||

|
||||

|
||||
|
||||
```{list-table} Peak-performance capabilities of the MI250 OAM for different data types.
|
||||
:header-rows: 1
|
||||
@@ -84,16 +84,9 @@ performance of these units amounts to 90.5 TFLOPS.
|
||||
- 362.1
|
||||
```
|
||||
|
||||
The above table summarizes the aggregated peak performance of the AMD
|
||||
Instinct MI250 OCP Open Accelerator Modules (OAM, OCP is short for Open Compute
|
||||
Platform) and its two GCDs for different data types and execution units. The
|
||||
middle column lists the peak performance (number of data elements processed in a
|
||||
single instruction) of a single compute unit if a SIMD (or matrix) instruction
|
||||
is being retired in each clock cycle. The third column lists the theoretical
|
||||
peak performance of the OAM module. The theoretical aggregated peak memory
|
||||
bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
|
||||
The above table summarizes the aggregated peak performance of the AMD Instinct MI250 Open Compute Platform (OCP) Open Accelerator Modules (OAMs) and its two GCDs for different data types and execution units. The middle column lists the peak performance (number of data elements processed in a single instruction) of a single compute unit if a SIMD (or matrix) instruction is being retired in each clock cycle. The third column lists the theoretical peak performance of the OAM module. The theoretical aggregated peak memory bandwidth of the GPU is 3.2 TB/sec (1.6 TB/sec per GCD).
|
||||
|
||||

|
||||

|
||||
|
||||
The following image shows the block diagram of an OAM package that consists
|
||||
of two GCDs, each of which constitutes one GPU device in the system. The two
|
||||
@@ -105,18 +98,18 @@ between the two GCDs of an OAM, or a bidirectional peak transfer bandwidth of
|
||||
## Node-level architecture
|
||||
|
||||
The following image shows the node-level architecture of a system that is
|
||||
based on the AMD Instinct MI250 accelerator. The MI250 OAMs attach to the host
|
||||
based on the AMD Instinct MI250 GPU. The MI250 OAMs attach to the host
|
||||
system via PCIe Gen 4 x16 links (yellow lines). Each GCD maintains its own PCIe
|
||||
x16 link to the host part of the system. Depending on the server platform, the
|
||||
GCD can attach to the AMD EPYC processor directly or via an optional PCIe switch
|
||||
. Note that some platforms may offer an x8 interface to the GCDs, which reduces
|
||||
the available host-to-GPU bandwidth.
|
||||
|
||||

|
||||

|
||||
|
||||
The preceding image shows the node-level architecture of a system with AMD
|
||||
EPYC processors in a dual-socket configuration and four AMD Instinct MI250
|
||||
accelerators. The MI250 OAMs attach to the host processors system via PCIe Gen 4
|
||||
GPUs. The MI250 OAMs attach to the host processors system via PCIe Gen 4
|
||||
x16 links (yellow lines). Depending on the system design, a PCIe switch may
|
||||
exist to make more PCIe lanes available for additional components like network
|
||||
interfaces and/or storage devices. Each GCD maintains its own PCIe x16 link to
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
.. meta::
|
||||
:description: MI300 and MI200 series performance counters and metrics
|
||||
:description: MI300 and MI200 Series performance counters and metrics
|
||||
:keywords: MI300, MI200, performance counters, command processor counters
|
||||
|
||||
***************************************************************************************************
|
||||
MI300 and MI200 series performance counters and metrics
|
||||
MI300 and MI200 Series performance counters and metrics
|
||||
***************************************************************************************************
|
||||
|
||||
This document lists and describes the hardware performance counters and derived metrics available
|
||||
for the AMD Instinct™ MI300 and MI200 GPU. You can also access this information using the
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:how-to/using-rocprofv3>`.
|
||||
|
||||
MI300 and MI200 series performance counters
|
||||
MI300 and MI200 Series performance counters
|
||||
===============================================================
|
||||
|
||||
Series performance counters include the following categories:
|
||||
@@ -27,7 +27,7 @@ The following sections provide additional details for each category.
|
||||
|
||||
.. note::
|
||||
|
||||
Preliminary validation of all MI300 and MI200 series performance counters is in progress. Those with
|
||||
Preliminary validation of all MI300 and MI200 Series performance counters is in progress. Those with
|
||||
an asterisk (*) require further evaluation.
|
||||
|
||||
.. _command-processor-counters:
|
||||
@@ -171,7 +171,7 @@ Instruction mix
|
||||
"``SQ_INSTS_SMEM``", "Instr", "Number of scalar memory instructions issued"
|
||||
"``SQ_INSTS_SMEM_NORM``", "Instr", "Number of scalar memory instructions normalized to match ``smem_level`` issued"
|
||||
"``SQ_INSTS_FLAT``", "Instr", "Number of flat instructions issued"
|
||||
"``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
|
||||
"``SQ_INSTS_FLAT_LDS_ONLY``", "Instr", "**MI200 Series only** Number of FLAT instructions that read/write only from/to LDS issued. Works only if ``EARLY_TA_DONE`` is enabled."
|
||||
"``SQ_INSTS_LDS``", "Instr", "Number of LDS instructions issued **(MI200: includes flat; MI300: does not include flat)**"
|
||||
"``SQ_INSTS_GDS``", "Instr", "Number of global data share instructions issued"
|
||||
"``SQ_INSTS_EXP_GDS``", "Instr", "Number of EXP and global data share instructions excluding skipped export instructions issued"
|
||||
@@ -396,9 +396,9 @@ Texture cache per pipe counters
|
||||
"``TCP_UTCL1_TRANSLATION_MISS[n]``", "Req", "Number of unified translation cache (L1) translation misses", "0-15"
|
||||
"``TCP_UTCL1_PERMISSION_MISS[n]``", "Req", "Number of unified translation cache (L1) permission misses", "0-15"
|
||||
"``TCP_TOTAL_CACHE_ACCESSES[n]``", "Req", "Number of vector L1d cache accesses including hits and misses", "0-15"
|
||||
"``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
|
||||
"``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
|
||||
"``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
|
||||
"``TCP_TCP_LATENCY[n]``", "Cycles", "**MI200 Series only** Accumulated wave access latency to vL1D over all wavefronts", "0-15"
|
||||
"``TCP_TCC_READ_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for reads and atomics with return", "0-15"
|
||||
"``TCP_TCC_WRITE_REQ_LATENCY[n]``", "Cycles", "**MI200 Series only** Total vL1D to L2 request latency over all wavefronts for writes and atomics without return", "0-15"
|
||||
"``TCP_TCC_READ_REQ[n]``", "Req", "Number of read requests to L2 cache", "0-15"
|
||||
"``TCP_TCC_WRITE_REQ[n]``", "Req", "Number of write requests to L2 cache", "0-15"
|
||||
"``TCP_TCC_ATOMIC_WITH_RET_REQ[n]``", "Req", "Number of atomic requests to L2 cache with return", "0-15"
|
||||
@@ -560,7 +560,7 @@ Note the following:
|
||||
``TCC_TAG_STALL[n]``, probes can stall the pipeline at a variety of places. There is no single point that
|
||||
can accurately measure the total stalls
|
||||
|
||||
MI300 and MI200 series derived metrics list
|
||||
MI300 and MI200 Series derived metrics list
|
||||
==============================================================
|
||||
|
||||
.. csv-table::
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Learn about the AMD Instinct MI300 series architecture."
|
||||
"description lang=en": "Learn about the AMD Instinct MI300 Series architecture."
|
||||
"keywords": "Instinct, MI300X, MI300A, microarchitecture, AMD, ROCm"
|
||||
---
|
||||
|
||||
# AMD Instinct™ MI300 series microarchitecture
|
||||
# AMD Instinct™ MI300 Series microarchitecture
|
||||
|
||||
The AMD Instinct MI300 series accelerators are based on the AMD CDNA 3
|
||||
The AMD Instinct MI300 Series GPUs are based on the AMD CDNA 3
|
||||
architecture which was designed to deliver leadership performance for HPC, artificial intelligence (AI), and machine
|
||||
learning (ML) workloads. The AMD Instinct MI300 series accelerators are well-suited for extreme scalability and compute performance, running
|
||||
learning (ML) workloads. The AMD Instinct MI300 Series GPUs are well-suited for extreme scalability and compute performance, running
|
||||
on everything from individual servers to the world’s largest exascale supercomputers.
|
||||
|
||||
With the MI300 series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
|
||||
With the MI300 Series, AMD is introducing the Accelerator Complex Die (XCD), which contains the
|
||||
GPU computational elements of the processor along with the lower levels of the cache hierarchy.
|
||||
|
||||
The following image depicts the structure of a single XCD in the AMD Instinct MI300 accelerator series.
|
||||
The following image depicts the structure of a single XCD in the AMD Instinct MI300 GPU Series.
|
||||
|
||||
```{figure} ../../data/shared/xcd-sys-arch.png
|
||||
---
|
||||
@@ -39,7 +39,7 @@ infrastructure) using the AMD Infinity Fabric™ technology as interconnect.
|
||||
The Matrix Cores inside the CDNA 3 CUs have significant improvements, emphasizing AI and machine
|
||||
learning, enhancing throughput of existing data types while adding support for new data types.
|
||||
CDNA 2 Matrix Cores support FP16 and BF16, while offering INT8 for inference. Compared to MI250X
|
||||
accelerators, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
|
||||
GPUs, CDNA 3 Matrix Cores triple the performance for FP16 and BF16, while providing a
|
||||
performance gain of 6.8 times for INT8. FP8 has a performance gain of 16 times compared to FP32,
|
||||
while TF32 has a gain of 4 times compared to FP32.
|
||||
|
||||
@@ -105,7 +105,7 @@ name: mi300-arch
|
||||
alt:
|
||||
align: center
|
||||
---
|
||||
MI300 series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
|
||||
MI300 Series system architecture showing MI300A (left) with 6 XCDs and 3 CCDs, while the MI300X (right) has 8 XCDs.
|
||||
```
|
||||
|
||||
## Node-level architecture
|
||||
@@ -116,11 +116,11 @@ name: mi300-node
|
||||
|
||||
align: center
|
||||
---
|
||||
MI300 series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
|
||||
MI300 Series node-level architecture showing 8 fully interconnected MI300X OAM modules connected to (optional) PCIEe switches via retimers and HGX connectors.
|
||||
```
|
||||
|
||||
The image above shows the node-level architecture of a system with AMD EPYC processors in a
|
||||
dual-socket configuration and eight AMD Instinct MI300X accelerators. The MI300X OAMs attach to the
|
||||
dual-socket configuration and eight AMD Instinct MI300X GPUs. The MI300X OAMs attach to the
|
||||
host system via PCIe Gen 5 x16 links (yellow lines). The GPUs are using seven high-bandwidth,
|
||||
low-latency AMD Infinity Fabric™ links (red lines) to form a fully connected 8-GPU system.
|
||||
|
||||
|
||||
530
docs/conceptual/gpu-arch/mi350-performance-counters.rst
Normal file
530
docs/conceptual/gpu-arch/mi350-performance-counters.rst
Normal file
@@ -0,0 +1,530 @@
|
||||
.. meta::
|
||||
:description: MI355 Series performance counters and metrics
|
||||
:keywords: MI355, MI355X, MI3XX
|
||||
|
||||
***********************************
|
||||
MI350 Series performance counters
|
||||
***********************************
|
||||
|
||||
This topic lists and describes the hardware performance counters and derived metrics available on the AMD Instinct MI350 and MI355 GPUs. These counters are available for profiling using `ROCprofiler-SDK <https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/index.html>`_ and `ROCm Compute Profiler <https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/>`_.
|
||||
|
||||
The following sections list the performance counters based on the IP blocks.
|
||||
|
||||
Command processor packet processor counters (CPC)
|
||||
==================================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - CPC_ALWAYS_COUNT
|
||||
- Always count.
|
||||
|
||||
* - CPC_ADC_VALID_CHUNK_NOT_AVAIL
|
||||
- ADC valid chunk is not available when dispatch walking is in progress in the multi-xcc mode.
|
||||
|
||||
* - CPC_ADC_DISPATCH_ALLOC_DONE
|
||||
- ADC dispatch allocation is done.
|
||||
|
||||
* - CPC_ADC_VALID_CHUNK_END
|
||||
- ADC crawler's valid chunk end in the multi-xcc mode.
|
||||
|
||||
* - CPC_SYNC_FIFO_FULL_LEVEL
|
||||
- SYNC FIFO full last cycles.
|
||||
|
||||
* - CPC_SYNC_FIFO_FULL
|
||||
- SYNC FIFO full times.
|
||||
|
||||
* - CPC_GD_BUSY
|
||||
- ADC busy.
|
||||
|
||||
* - CPC_TG_SEND
|
||||
- ADC thread group send.
|
||||
|
||||
* - CPC_WALK_NEXT_CHUNK
|
||||
- ADC walking next valid chunk in the multi-xcc mode.
|
||||
|
||||
* - CPC_STALLED_BY_SE0_SPI
|
||||
- ADC CSDATA stalled by SE0SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE1_SPI
|
||||
- ADC CSDATA stalled by SE1SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE2_SPI
|
||||
- ADC CSDATA stalled by SE2SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE3_SPI
|
||||
- ADC CSDATA stalled by SE3SPI.
|
||||
|
||||
* - CPC_LTE_ALL
|
||||
- CPC sync counter LteAll. Only Master XCD manages LteAll.
|
||||
|
||||
* - CPC_SYNC_WRREQ_FIFO_BUSY
|
||||
- CPC sync counter request FIFO is not empty.
|
||||
|
||||
* - CPC_CANE_BUSY
|
||||
- CPC CANE bus is busy, which indicates the presence of inflight sync counter requests.
|
||||
|
||||
* - CPC_CANE_STALL
|
||||
- CPC sync counter sending is stalled by CANE.
|
||||
|
||||
Shader pipe interpolators (SPI) counters
|
||||
=========================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - SPI_CS0_WINDOW_VALID
|
||||
- Clock count enabled by PIPE0 perfcounter_start event.
|
||||
|
||||
* - SPI_CS0_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE0 (SPI or SH).
|
||||
|
||||
* - SPI_CS0_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE0.
|
||||
|
||||
* - SPI_CS0_CRAWLER_STALL
|
||||
- Number of clocks when PIPE0 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS0_EVENT_WAVE
|
||||
- Number of PIPE0 events and waves.
|
||||
|
||||
* - SPI_CS0_WAVE
|
||||
- Number of PIPE0 waves.
|
||||
|
||||
* - SPI_CS1_WINDOW_VALID
|
||||
- Clock count enabled by PIPE1 perfcounter_start event.
|
||||
|
||||
* - SPI_CS1_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE1 (SPI or SH).
|
||||
|
||||
* - SPI_CS1_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE1.
|
||||
|
||||
* - SPI_CS1_CRAWLER_STALL
|
||||
- Number of clocks when PIPE1 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS1_EVENT_WAVE
|
||||
- Number of PIPE1 events and waves.
|
||||
|
||||
* - SPI_CS1_WAVE
|
||||
- Number of PIPE1 waves.
|
||||
|
||||
* - SPI_CS2_WINDOW_VALID
|
||||
- Clock count enabled by PIPE2 perfcounter_start event.
|
||||
|
||||
* - SPI_CS2_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE2 (SPI or SH).
|
||||
|
||||
* - SPI_CS2_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE2.
|
||||
|
||||
* - SPI_CS2_CRAWLER_STALL
|
||||
- Number of clocks when PIPE2 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS2_EVENT_WAVE
|
||||
- Number of PIPE2 events and waves.
|
||||
|
||||
* - SPI_CS2_WAVE
|
||||
- Number of PIPE2 waves.
|
||||
|
||||
* - SPI_CS3_WINDOW_VALID
|
||||
- Clock count enabled by PIPE3 perfcounter_start event.
|
||||
|
||||
* - SPI_CS3_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE3 (SPI or SH).
|
||||
|
||||
* - SPI_CS3_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE3.
|
||||
|
||||
* - SPI_CS3_CRAWLER_STALL
|
||||
- Number of clocks when PIPE3 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS3_EVENT_WAVE
|
||||
- Number of PIPE3 events and waves.
|
||||
|
||||
* - SPI_CS3_WAVE
|
||||
- Number of PIPE3 waves.
|
||||
|
||||
* - SPI_CSQ_P0_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue0.
|
||||
|
||||
* - SPI_CSQ_P0_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue1.
|
||||
|
||||
* - SPI_CSQ_P0_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue2.
|
||||
|
||||
* - SPI_CSQ_P0_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue3.
|
||||
|
||||
* - SPI_CSQ_P0_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue4.
|
||||
|
||||
* - SPI_CSQ_P0_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue5.
|
||||
|
||||
* - SPI_CSQ_P0_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue6.
|
||||
|
||||
* - SPI_CSQ_P0_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue7.
|
||||
|
||||
* - SPI_CSQ_P1_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue0.
|
||||
|
||||
* - SPI_CSQ_P1_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue1.
|
||||
|
||||
* - SPI_CSQ_P1_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue2.
|
||||
|
||||
* - SPI_CSQ_P1_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue3.
|
||||
|
||||
* - SPI_CSQ_P1_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue4.
|
||||
|
||||
* - SPI_CSQ_P1_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue5.
|
||||
|
||||
* - SPI_CSQ_P1_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue6.
|
||||
|
||||
* - SPI_CSQ_P1_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue7.
|
||||
|
||||
* - SPI_CSQ_P2_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue0.
|
||||
|
||||
* - SPI_CSQ_P2_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue1.
|
||||
|
||||
* - SPI_CSQ_P2_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue2.
|
||||
|
||||
* - SPI_CSQ_P2_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue3.
|
||||
|
||||
* - SPI_CSQ_P2_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue4.
|
||||
|
||||
* - SPI_CSQ_P2_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue5.
|
||||
|
||||
* - SPI_CSQ_P2_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue6.
|
||||
|
||||
* - SPI_CSQ_P2_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue7.
|
||||
|
||||
* - SPI_CSQ_P3_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue0.
|
||||
|
||||
* - SPI_CSQ_P3_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue1.
|
||||
|
||||
* - SPI_CSQ_P3_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue2.
|
||||
|
||||
* - SPI_CSQ_P3_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue3.
|
||||
|
||||
* - SPI_CSQ_P3_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue4.
|
||||
|
||||
* - SPI_CSQ_P3_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue5.
|
||||
|
||||
* - SPI_CSQ_P3_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue6.
|
||||
|
||||
* - SPI_CSQ_P3_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue7.
|
||||
|
||||
* - SPI_CSQ_P0_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE0 queues.
|
||||
|
||||
* - SPI_CSQ_P1_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE1 queues.
|
||||
|
||||
* - SPI_CSQ_P2_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE2 queues.
|
||||
|
||||
* - SPI_CSQ_P3_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE3 queues.
|
||||
|
||||
* - SPI_VWC0_VDATA_VALID_WR
|
||||
- Number of clocks VGPR bus_0 writes VGPRs.
|
||||
|
||||
* - SPI_VWC1_VDATA_VALID_WR
|
||||
- Number of clocks VGPR bus_1 writes VGPRs.
|
||||
|
||||
* - SPI_CSC_WAVE_CNT_BUSY
|
||||
- Number of cycles when there is any wave in the pipe.
|
||||
|
||||
Compute unit (SQ) counters
|
||||
===========================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - SQ_INSTS_VALU_MFMA_F6F4
|
||||
- Number of VALU V_MFMA_*_F6F4 instructions.
|
||||
|
||||
* - SQ_INSTS_VALU_MFMA_MOPS_F6F4
|
||||
- Number of VALU matrix with the performed math operations (add or mul) divided by 512, assuming a full EXEC mask of F6 or F4 data type.
|
||||
|
||||
* - SQ_ACTIVE_INST_VALU2
|
||||
- Number of quad-cycles when two VALU instructions are issued (per-simd, nondeterministic).
|
||||
|
||||
* - SQ_INSTS_LDS_LOAD
|
||||
- Number of LDS load instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_STORE
|
||||
- Number of LDS store instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_ATOMIC
|
||||
- Number of LDS atomic instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_LOAD_BANDWIDTH
|
||||
- Total number of 64-bytes loaded (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_STORE_BANDWIDTH
|
||||
- Total number of 64-bytes written (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_ATOMIC_BANDWIDTH
|
||||
- Total number of 64-bytes atomic (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP16
|
||||
- Counts FLOPS per instruction on float 16 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP32
|
||||
- Counts FLOPS per instruction on float 32 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP64
|
||||
- Counts FLOPS per instruction on float 64 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP16_TRANS
|
||||
- Counts FLOPS per instruction on float 16 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP32_TRANS
|
||||
- Counts FLOPS per instruction on float 32 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP64_TRANS
|
||||
- Counts FLOPS per instruction on float 64 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_IOPS
|
||||
- Counts OPS per instruction on integer or unsigned or bit data (per-simd, emulated).
|
||||
|
||||
* - SQ_LDS_DATA_FIFO_FULL
|
||||
- Number of cycles LDS data FIFO is full (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_LDS_CMD_FIFO_FULL
|
||||
- Number of cycles LDS command FIFO is full (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_TA_ADDR_FIFO_FULL
|
||||
- Number of cycles texture requests are stalled due to full address FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_TA_CMD_FIFO_FULL
|
||||
- Number of cycles texture requests are stalled due to full cmd FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_WR_TA_DATA_FIFO_FULL
|
||||
- Number of cycles texture writes are stalled due to full data FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQC_ICACHE_MISSES_DUPLICATE
|
||||
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
|
||||
|
||||
* - SQC_DCACHE_MISSES_DUPLICATE
|
||||
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
|
||||
|
||||
Texture addressing (TA) unit counters
|
||||
======================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TA_BUFFER_READ_LDS_WAVEFRONTS
|
||||
- Number of buffer read wavefronts for LDS return processed by the TA.
|
||||
|
||||
* - TA_FLAT_READ_LDS_WAVEFRONTS
|
||||
- Number of flat opcode reads for LDS return processed by the TA.
|
||||
|
||||
Texture data (TD) unit counters
|
||||
================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TD_WRITE_ACKT_WAVEFRONT
|
||||
- Number of write acknowledgments, sent to SQ and not to SP.
|
||||
|
||||
* - TD_TD_SP_TRAFFIC
|
||||
- Number of times this TD sends data to the SP.
|
||||
|
||||
Texture cache per pipe (TCP) counters
|
||||
======================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TCP_TCP_TA_ADDR_STALL_CYCLES
|
||||
- TCP stalls TA addr interface.
|
||||
|
||||
* - TCP_TCP_TA_DATA_STALL_CYCLES
|
||||
- TCP stalls TA data interface. Now windowed.
|
||||
|
||||
* - TCP_LFIFO_STALL_CYCLES
|
||||
- Memory latency FIFOs full stall.
|
||||
|
||||
* - TCP_RFIFO_STALL_CYCLES
|
||||
- Memory Request FIFOs full stall.
|
||||
|
||||
* - TCP_TCR_RDRET_STALL
|
||||
- Write into cache stalled by read return from TCR.
|
||||
|
||||
* - TCP_PENDING_STALL_CYCLES
|
||||
- Stall due to data pending from L2.
|
||||
|
||||
* - TCP_UTCL1_SERIALIZATION_STALL
|
||||
- Total number of stalls caused due to serializing translation requests through the UTCL1.
|
||||
|
||||
* - TCP_UTCL1_THRASHING_STALL
|
||||
- Stall caused by thrashing feature in any probe. Lacks accuracy when the stall signal overlaps between probe0 and probe1, which is worse with MECO of thrashing deadlock. Some probe0 events could miss being counted in with MECO on. This perf count provides a rough thrashing estimate.
|
||||
|
||||
* - TCP_UTCL1_TRANSLATION_MISS_UNDER_MISS
|
||||
- Translation miss_under_miss.
|
||||
|
||||
* - TCP_UTCL1_STALL_INFLIGHT_MAX
|
||||
- Total UTCL1 stalls due to inflight counter saturation.
|
||||
|
||||
* - TCP_UTCL1_STALL_LRU_INFLIGHT
|
||||
- Total UTCL1 stalls due to LRU cache line with inflight traffic.
|
||||
|
||||
* - TCP_UTCL1_STALL_MULTI_MISS
|
||||
- Total UTCL1 stalls due to arbitrated multiple misses.
|
||||
|
||||
* - TCP_UTCL1_LFIFO_FULL
|
||||
- Total UTCL1 and UTCL2 latency, which hides FIFO full cycles.
|
||||
|
||||
* - TCP_UTCL1_STALL_LFIFO_NOT_RES
|
||||
- Total UTCL1 stalls due to UTCL2 latency, which hides FIFO output (not resident).
|
||||
|
||||
* - TCP_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS
|
||||
- Total UTCL1 stalls due to UTCL2_req being out of credits.
|
||||
|
||||
* - TCP_CLIENT_UTCL1_INFLIGHT
|
||||
- The sum of inflight client to UTCL1 requests per cycle.
|
||||
|
||||
* - TCP_TAGRAM0_REQ
|
||||
- Total L2 requests mapping to TagRAM 0 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM1_REQ
|
||||
- Total L2 requests mapping to TagRAM 1 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM2_REQ
|
||||
- Total L2 requests mapping to TagRAM 2 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM3_REQ
|
||||
- Total L2 requests mapping to TagRAM 3 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TCP_LATENCY
|
||||
- Total TCP wave latency (from the first clock of wave entering to the first clock of wave leaving). Divide by TA_TCP_STATE_READ to find average wave latency.
|
||||
|
||||
* - TCP_TCC_READ_REQ_LATENCY
|
||||
- Total TCP to TCC request latency for reads and atomics with return. Not Windowed.
|
||||
|
||||
* - TCP_TCC_WRITE_REQ_LATENCY
|
||||
- Total TCP to TCC request latency for writes and atomics without return. Not Windowed.
|
||||
|
||||
* - TCP_TCC_WRITE_REQ_HOLE_LATENCY
|
||||
- Total TCP req to TCC hole latency for writes and atomics. Not Windowed.
|
||||
|
||||
Texture cache per channel (TCC) counters
|
||||
=========================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TCC_READ_SECTORS
|
||||
- Total number of 32B data sectors in read requests.
|
||||
|
||||
* - TCC_WRITE_SECTORS
|
||||
- Total number of 32B data sectors in write requests.
|
||||
|
||||
* - TCC_ATOMIC_SECTORS
|
||||
- Total number of 32B data sectors in atomic requests.
|
||||
|
||||
* - TCC_BYPASS_REQ
|
||||
- Number of bypass requests. This is measured at the tag block.
|
||||
|
||||
* - TCC_LATENCY_FIFO_FULL
|
||||
- Number of cycles when the latency FIFO is full.
|
||||
|
||||
* - TCC_SRC_FIFO_FULL
|
||||
- Number of cycles when the SRC FIFO is assumed to be full as measured at the IB block.
|
||||
|
||||
* - TCC_EA0_RDREQ_64B
|
||||
- Number of 64-byte TCC/EA read requests.
|
||||
|
||||
* - TCC_EA0_RDREQ_128B
|
||||
- Number of 128-byte TCC/EA read requests.
|
||||
|
||||
* - TCC_IB_REQ
|
||||
- Number of requests through the IB. This measures the number of raw requests from graphics clients to this TCC.
|
||||
|
||||
* - TCC_IB_STALL
|
||||
- Number of cycles when the IB output is stalled.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_DRAM
|
||||
- Number of TCC/EA write requests (32-byte or 64-byte) destined for DRAM (MC).
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_DRAM
|
||||
- Number of TCC/EA atomic requests (32-byte or 64-byte) destined for DRAM (MC).
|
||||
|
||||
* - TCC_EA0_RDREQ_DRAM_32B
|
||||
- Number of 32-byte TCC/EA read requests due to DRAM traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_RDREQ_GMI_32B
|
||||
- Number of 32-byte TCC/EA read requests due to GMI traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_RDREQ_IO_32B
|
||||
- Number of 32-byte TCC/EA read requests due to IO traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_DRAM_32B
|
||||
- Number of 32-byte TCC/EA write requests due to DRAM traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_DRAM_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to DRAM traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_GMI_32B
|
||||
- Number of 32-byte TCC/EA write requests due to GMI traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_GMI_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to GMI traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_IO_32B
|
||||
- Number of 32-byte TCC/EA write requests due to IO traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_IO_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to IO traffic. One 64-byte request is counted as two.
|
||||
@@ -34,7 +34,7 @@ Runtime
|
||||
|
||||
```{code-block} shell
|
||||
:caption: Example to expose the 1. device and a device based on UUID.
|
||||
export ROCR_VISIBLE_DEVICES="0,GPU-DEADBEEFDEADBEEF"
|
||||
export ROCR_VISIBLE_DEVICES="0,GPU-4b2c1a9f-8d3e-6f7a-b5c9-2e4d8a1f6c3b"
|
||||
```
|
||||
|
||||
### `GPU_DEVICE_ORDINAL`
|
||||
|
||||
89
docs/conf.py
89
docs/conf.py
@@ -8,18 +8,23 @@ import os
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from subprocess import run
|
||||
|
||||
shutil.copy2("../RELEASE.md", "./about/release-notes.md")
|
||||
shutil.copy2("../CHANGELOG.md", "./release/changelog.md")
|
||||
gh_release_path = os.path.join("..", "RELEASE.md")
|
||||
gh_changelog_path = os.path.join("..", "CHANGELOG.md")
|
||||
sphinx_release_path = os.path.join("about", "release-notes.md")
|
||||
sphinx_changelog_path = os.path.join("release", "changelog.md")
|
||||
shutil.copy2(gh_release_path, sphinx_release_path)
|
||||
shutil.copy2(gh_changelog_path, sphinx_changelog_path)
|
||||
|
||||
# Mark the consolidated changelog as orphan to prevent Sphinx from warning about missing toctree entries
|
||||
with open("./release/changelog.md", "r+") as file:
|
||||
with open(sphinx_changelog_path, "r+", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
file.seek(0)
|
||||
file.write(":orphan:\n" + content)
|
||||
|
||||
# Replace GitHub-style [!ADMONITION]s with Sphinx-compatible ```{admonition} blocks
|
||||
with open("./release/changelog.md", "r") as file:
|
||||
with open(sphinx_changelog_path, "r", encoding="utf-8") as file:
|
||||
lines = file.readlines()
|
||||
|
||||
modified_lines = []
|
||||
@@ -57,11 +62,14 @@ with open("./release/changelog.md", "r") as file:
|
||||
|
||||
file.close()
|
||||
|
||||
with open("./release/changelog.md", 'w') as file:
|
||||
with open(sphinx_changelog_path, "w", encoding="utf-8") as file:
|
||||
file.writelines(modified_lines)
|
||||
|
||||
os.system("mkdir -p ../_readthedocs/html/downloads")
|
||||
os.system("cp compatibility/compatibility-matrix-historical-6.0.csv ../_readthedocs/html/downloads/compatibility-matrix-historical-6.0.csv")
|
||||
matrix_path = os.path.join("compatibility", "compatibility-matrix-historical-6.0.csv")
|
||||
rtd_path = os.path.join("..", "_readthedocs", "html", "downloads")
|
||||
if not os.path.exists(rtd_path):
|
||||
os.makedirs(rtd_path)
|
||||
shutil.copy2(matrix_path, rtd_path)
|
||||
|
||||
latex_engine = "xelatex"
|
||||
latex_elements = {
|
||||
@@ -73,24 +81,27 @@ latex_elements = {
|
||||
}
|
||||
|
||||
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "rocm.docs.amd.com")
|
||||
html_context = {}
|
||||
html_context = {"docs_header_version": "7.1.1"}
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
# Check if the branch is a docs/ branch
|
||||
official_branch = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True).stdout.find("docs/")
|
||||
|
||||
# configurations for PDF output by Read the Docs
|
||||
project = "ROCm Documentation"
|
||||
project_path = os.path.abspath(".").replace("\\", "/")
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.4.3"
|
||||
release = "6.4.3"
|
||||
version = "7.1.1"
|
||||
release = "7.1.1"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-08-07"},
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-11-26"},
|
||||
{"file": "release/changelog", "os": ["linux"],},
|
||||
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
|
||||
@@ -100,12 +111,17 @@ article_pages = [
|
||||
{"file": "compatibility/ml-compatibility/stanford-megatron-lm-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/dgl-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/megablocks-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/taichi-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/ray-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/llama-cpp-compatibility", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/flashinfer-compatibility", "os": ["linux"]},
|
||||
{"file": "how-to/deep-learning-rocm", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/system-setup/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/system-setup/multi-node-setup", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/system-setup/prerequisite-system-validation", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/system-setup/system-health-check", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
|
||||
@@ -117,15 +133,37 @@ article_pages = [
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.3", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.4", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.5", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.6", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.7", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.8", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.9", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-v25.10", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/megatron-lm-primus-migration-guide", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-megatron", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.7", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.8", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.9", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-megatron-v25.10", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.3", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.6", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.7", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.8", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.9", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.10", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.8", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.9", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/primus-pytorch-v25.10", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
|
||||
@@ -147,7 +185,19 @@ article_pages = [
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250521", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250605", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250702", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250715", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.0-20250812", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.1-20250909", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.2-20251006", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.11.1-20251103", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/sglang-history", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/xdit-diffusion-inference", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.11", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.12", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.13", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
|
||||
@@ -175,7 +225,7 @@ external_toc_path = "./sphinx/_toc.yml"
|
||||
# Add the _extensions directory to Python's search path
|
||||
sys.path.append(str(Path(__file__).parent / 'extension'))
|
||||
|
||||
extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref", "csv-to-list-table"]
|
||||
extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "remote-content", "version-ref", "csv-to-list-table"]
|
||||
|
||||
compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv')
|
||||
|
||||
@@ -185,10 +235,14 @@ external_projects_current_project = "rocm"
|
||||
# external_projects_remote_repository = ""
|
||||
|
||||
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "https://rocm-stg.amd.com/")
|
||||
html_context = {}
|
||||
html_context = {"docs_header_version": "7.1.0"}
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
html_context["official_branch"] = official_branch
|
||||
html_context["version"] = version
|
||||
html_context["release"] = release
|
||||
|
||||
html_theme = "rocm_docs_theme"
|
||||
html_theme_options = {"flavor": "rocm-docs-home"}
|
||||
|
||||
@@ -207,10 +261,13 @@ suppress_warnings = ["autosectionlabel.*"]
|
||||
|
||||
html_context = {
|
||||
"project_path" : {project_path},
|
||||
"gpu_type" : [('AMD Instinct accelerators', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ],
|
||||
"gpu_type" : [('AMD Instinct GPUs', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ],
|
||||
"atomics_type" : [('HW atomics', 'hw-atomics'), ('CAS emulation', 'cas-atomics')],
|
||||
"pcie_type" : [('No PCIe atomics', 'nopcie'), ('PCIe atomics', 'pcie')],
|
||||
"memory_type" : [('Device DRAM', 'device-dram'), ('Migratable Host DRAM', 'migratable-host-dram'), ('Pinned Host DRAM', 'pinned-host-dram')],
|
||||
"granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')],
|
||||
"scope_type" : [('Device', 'device'), ('System', 'system')]
|
||||
}
|
||||
|
||||
# Disable figure and table numbering
|
||||
numfig = False
|
||||
|
||||
@@ -28,13 +28,31 @@ See the [Python requirements file](https://github.com/ROCm/ROCm/blob/develop/doc
|
||||
|
||||
Use the Python Virtual Environment (`venv`) and run the following commands from the project root:
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} Linux and WSL
|
||||
:sync: linux
|
||||
|
||||
```sh
|
||||
python3 -mvenv .venv
|
||||
|
||||
.venv/bin/python -m pip install -r docs/sphinx/requirements.txt
|
||||
.venv/bin/python -m sphinx -T -E -b html -d _build/doctrees -D language=en docs _build/html
|
||||
.venv/bin/python -m pip install -r docs/sphinx/requirements.txt
|
||||
.venv/bin/python -m sphinx -T -E -b html -d _build/doctrees -D language=en docs _build/html
|
||||
```
|
||||
|
||||
:::
|
||||
:::{tab-item} Windows
|
||||
:sync: windows
|
||||
|
||||
```powershell
|
||||
python -mvenv .venv
|
||||
|
||||
.venv\Scripts\python.exe -m pip install -r docs/sphinx/requirements.txt
|
||||
.venv\Scripts\python.exe -m sphinx -T -E -b html -d _build/doctrees -D language=en docs _build/html
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
|
||||
Navigate to `_build/html/index.html` and open this file in a web browser.
|
||||
|
||||
## Visual Studio Code
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 81 KiB After Width: | Height: | Size: 114 KiB |
@@ -0,0 +1,91 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa
|
||||
rocm_version: 6.4.1
|
||||
vllm_version: 0.10.0 (0.10.1.dev395+g340ea86df.rocm641)
|
||||
pytorch_version: 2.7.0+gitf717b2a
|
||||
hipblaslt_version: 0.15
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_vllm_llama-3.1-70b
|
||||
model_repo: meta-llama/Llama-3.1-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-70b_fp8
|
||||
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
- model: Qwen3 30B A3B
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b
|
||||
model_repo: Qwen/Qwen3-30B-A3B
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
|
||||
precision: float16
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
@@ -0,0 +1,188 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c
|
||||
components:
|
||||
ROCm: 6.4.1
|
||||
vLLM: 0.10.1 (0.10.1rc2.dev409+g0b6bf6691.rocm641)
|
||||
PyTorch: 2.7.0+gitf717b2a
|
||||
hipBLASLt: 0.15
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_vllm_llama-3.1-70b
|
||||
model_repo: meta-llama/Llama-3.1-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 4096
|
||||
max_num_batched_tokens: 4096
|
||||
max_model_len: 4096
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-70b_fp8
|
||||
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 32768
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 65536
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_seq_len_to_capture: 32768
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_seq_len_to_capture: 65536
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 131072
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 30B A3B
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b
|
||||
model_repo: Qwen/Qwen3-30B-A3B
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 32768
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_seq_len_to_capture: 16384
|
||||
max_num_batched_tokens: 16384
|
||||
max_model_len: 8192
|
||||
@@ -0,0 +1,316 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/vllm:rocm7.0.0_vllm_0.10.2_20251006
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.10.2_20251006/images/sha256-94fd001964e1cf55c3224a445b1fb5be31a7dac302315255db8422d813edd7f5
|
||||
components:
|
||||
ROCm: 7.0.0
|
||||
vLLM: 0.10.2 (0.11.0rc2.dev160+g790d22168.rocm700)
|
||||
PyTorch: 2.9.0a0+git1c57644
|
||||
hipBLASLt: 1.0.0
|
||||
dockerfile:
|
||||
commit: 790d22168820507f3105fef29596549378cfe399
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 4096
|
||||
max_model_len: 4096
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B MXFP4
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp4
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
precision: float4
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B
|
||||
mad_tag: pyt_vllm_llama-3.3-70b
|
||||
model_repo: meta-llama/Llama-3.3-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.3-70b_fp8
|
||||
model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B MXFP4
|
||||
mad_tag: pyt_vllm_llama-3.3-70b_fp4
|
||||
model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
precision: float4
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Scout 17Bx16E
|
||||
mad_tag: pyt_vllm_llama-4-scout-17b-16e
|
||||
model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Maverick 17Bx128E
|
||||
mad_tag: pyt_vllm_llama-4-maverick-17b-128e
|
||||
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Maverick 17Bx128E FP8
|
||||
mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
|
||||
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek R1 0528 FP8
|
||||
mad_tag: pyt_vllm_deepseek-r1
|
||||
model_repo: deepseek-ai/DeepSeek-R1-0528
|
||||
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_seqs: 1024
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- group: OpenAI GPT OSS
|
||||
tag: gpt-oss
|
||||
models:
|
||||
- model: GPT OSS 20B
|
||||
mad_tag: pyt_vllm_gpt-oss-20b
|
||||
model_repo: openai/gpt-oss-20b
|
||||
url: https://huggingface.co/openai/gpt-oss-20b
|
||||
precision: bfloat16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 8192
|
||||
max_model_len: 8192
|
||||
- model: GPT OSS 120B
|
||||
mad_tag: pyt_vllm_gpt-oss-120b
|
||||
model_repo: openai/gpt-oss-120b
|
||||
url: https://huggingface.co/openai/gpt-oss-120b
|
||||
precision: bfloat16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 8192
|
||||
max_model_len: 8192
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen3 8B
|
||||
mad_tag: pyt_vllm_qwen3-8b
|
||||
model_repo: Qwen/Qwen3-8B
|
||||
url: https://huggingface.co/Qwen/Qwen3-8B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 32B
|
||||
mad_tag: pyt_vllm_qwen3-32b
|
||||
model_repo: Qwen/Qwen3-32b
|
||||
url: https://huggingface.co/Qwen/Qwen3-32B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 30B A3B
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b
|
||||
model_repo: Qwen/Qwen3-30B-A3B
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 30B A3B FP8
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
|
||||
model_repo: Qwen/Qwen3-30B-A3B-FP8
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 235B A22B
|
||||
mad_tag: pyt_vllm_qwen3-235b-a22b
|
||||
model_repo: Qwen/Qwen3-235B-A22B
|
||||
url: https://huggingface.co/Qwen/Qwen3-235B-A22B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 235B A22B FP8
|
||||
mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
|
||||
model_repo: Qwen/Qwen3-235B-A22B-FP8
|
||||
url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 16384
|
||||
max_model_len: 8192
|
||||
@@ -0,0 +1,316 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/vllm:rocm7.0.0_vllm_0.11.1_20251103
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm7.0.0_vllm_0.11.1_20251103/images/sha256-8d60429043d4d00958da46039a1de0d9b82df814d45da482497eef26a6076506
|
||||
components:
|
||||
ROCm: 7.0.0
|
||||
vLLM: 0.11.1 (0.11.1rc2.dev141+g38f225c2a.rocm700)
|
||||
PyTorch: 2.9.0a0+git1c57644
|
||||
hipBLASLt: 1.0.0
|
||||
dockerfile:
|
||||
commit: 38f225c2abeadc04c2cc398814c2f53ea02c3c72
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 4096
|
||||
max_model_len: 4096
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.1 405B MXFP4
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp4
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-MXFP4-Preview
|
||||
precision: float4
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B
|
||||
mad_tag: pyt_vllm_llama-3.3-70b
|
||||
model_repo: meta-llama/Llama-3.3-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.3-70b_fp8
|
||||
model_repo: amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 3.3 70B MXFP4
|
||||
mad_tag: pyt_vllm_llama-3.3-70b_fp4
|
||||
model_repo: amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-MXFP4-Preview
|
||||
precision: float4
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Scout 17Bx16E
|
||||
mad_tag: pyt_vllm_llama-4-scout-17b-16e
|
||||
model_repo: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Maverick 17Bx128E
|
||||
mad_tag: pyt_vllm_llama-4-maverick-17b-128e
|
||||
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Llama 4 Maverick 17Bx128E FP8
|
||||
mad_tag: pyt_vllm_llama-4-maverick-17b-128e_fp8
|
||||
model_repo: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek R1 0528 FP8
|
||||
mad_tag: pyt_vllm_deepseek-r1
|
||||
model_repo: deepseek-ai/DeepSeek-R1-0528
|
||||
url: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_seqs: 1024
|
||||
max_num_batched_tokens: 131072
|
||||
max_model_len: 8192
|
||||
- group: OpenAI GPT OSS
|
||||
tag: gpt-oss
|
||||
models:
|
||||
- model: GPT OSS 20B
|
||||
mad_tag: pyt_vllm_gpt-oss-20b
|
||||
model_repo: openai/gpt-oss-20b
|
||||
url: https://huggingface.co/openai/gpt-oss-20b
|
||||
precision: bfloat16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 8192
|
||||
max_model_len: 8192
|
||||
- model: GPT OSS 120B
|
||||
mad_tag: pyt_vllm_gpt-oss-120b
|
||||
model_repo: openai/gpt-oss-120b
|
||||
url: https://huggingface.co/openai/gpt-oss-120b
|
||||
precision: bfloat16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 8192
|
||||
max_model_len: 8192
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 32768
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 65536
|
||||
max_model_len: 8192
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen3 8B
|
||||
mad_tag: pyt_vllm_qwen3-8b
|
||||
model_repo: Qwen/Qwen3-8B
|
||||
url: https://huggingface.co/Qwen/Qwen3-8B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 32B
|
||||
mad_tag: pyt_vllm_qwen3-32b
|
||||
model_repo: Qwen/Qwen3-32b
|
||||
url: https://huggingface.co/Qwen/Qwen3-32B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 30B A3B
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b
|
||||
model_repo: Qwen/Qwen3-30B-A3B
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 30B A3B FP8
|
||||
mad_tag: pyt_vllm_qwen3-30b-a3b_fp8
|
||||
model_repo: Qwen/Qwen3-30B-A3B-FP8
|
||||
url: https://huggingface.co/Qwen/Qwen3-30B-A3B-FP8
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 235B A22B
|
||||
mad_tag: pyt_vllm_qwen3-235b-a22b
|
||||
model_repo: Qwen/Qwen3-235B-A22B
|
||||
url: https://huggingface.co/Qwen/Qwen3-235B-A22B
|
||||
precision: float16
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- model: Qwen3 235B A22B FP8
|
||||
mad_tag: pyt_vllm_qwen3-235b-a22b_fp8
|
||||
model_repo: Qwen/Qwen3-235B-A22B-FP8
|
||||
url: https://huggingface.co/Qwen/Qwen3-235B-A22B-FP8
|
||||
precision: float8
|
||||
config:
|
||||
tp: 8
|
||||
dtype: auto
|
||||
kv_cache_dtype: fp8
|
||||
max_num_batched_tokens: 40960
|
||||
max_model_len: 8192
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
precision: float16
|
||||
config:
|
||||
tp: 1
|
||||
dtype: auto
|
||||
kv_cache_dtype: auto
|
||||
max_num_batched_tokens: 16384
|
||||
max_model_len: 8192
|
||||
@@ -0,0 +1,163 @@
|
||||
vllm_benchmark:
|
||||
unified_docker:
|
||||
latest:
|
||||
# TODO: update me
|
||||
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.1_20250715
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250715/images/sha256-4a429705fa95a58f6d20aceab43b1b76fa769d57f32d5d28bd3f4e030e2a78ea
|
||||
rocm_version: 6.4.1
|
||||
vllm_version: 0.9.1 (0.9.2.dev364+gb432b7a28.rocm641)
|
||||
pytorch_version: 2.7.0+gitf717b2a
|
||||
hipblaslt_version: 0.15
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_vllm_llama-3.1-8b
|
||||
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: float16
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_vllm_llama-3.1-70b
|
||||
model_repo: meta-llama/Llama-3.1-70B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_vllm_llama-3.1-405b
|
||||
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
||||
precision: float16
|
||||
- model: Llama 2 7B
|
||||
mad_tag: pyt_vllm_llama-2-7b
|
||||
model_repo: meta-llama/Llama-2-7b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
|
||||
precision: float16
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_vllm_llama-2-70b
|
||||
model_repo: meta-llama/Llama-2-70b-chat-hf
|
||||
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
||||
precision: float16
|
||||
- model: Llama 3.1 8B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
||||
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 70B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-70b_fp8
|
||||
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- model: Llama 3.1 405B FP8
|
||||
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
||||
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
precision: float8
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral MoE 8x7B
|
||||
mad_tag: pyt_vllm_mixtral-8x7b
|
||||
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x22B
|
||||
mad_tag: pyt_vllm_mixtral-8x22b
|
||||
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
||||
precision: float16
|
||||
- model: Mistral 7B
|
||||
mad_tag: pyt_vllm_mistral-7b
|
||||
model_repo: mistralai/Mistral-7B-Instruct-v0.3
|
||||
url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
|
||||
precision: float16
|
||||
- model: Mixtral MoE 8x7B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
||||
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- model: Mixtral MoE 8x22B FP8
|
||||
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
||||
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- model: Mistral 7B FP8
|
||||
mad_tag: pyt_vllm_mistral-7b_fp8
|
||||
model_repo: amd/Mistral-7B-v0.1-FP8-KV
|
||||
url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
|
||||
precision: float8
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen2 7B
|
||||
mad_tag: pyt_vllm_qwen2-7b
|
||||
model_repo: Qwen/Qwen2-7B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
|
||||
precision: float16
|
||||
- model: Qwen2 72B
|
||||
mad_tag: pyt_vllm_qwen2-72b
|
||||
model_repo: Qwen/Qwen2-72B-Instruct
|
||||
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
|
||||
precision: float16
|
||||
- model: QwQ-32B
|
||||
mad_tag: pyt_vllm_qwq-32b
|
||||
model_repo: Qwen/QwQ-32B
|
||||
url: https://huggingface.co/Qwen/QwQ-32B
|
||||
precision: float16
|
||||
tunableop: true
|
||||
- group: Databricks DBRX
|
||||
tag: dbrx
|
||||
models:
|
||||
- model: DBRX Instruct
|
||||
mad_tag: pyt_vllm_dbrx-instruct
|
||||
model_repo: databricks/dbrx-instruct
|
||||
url: https://huggingface.co/databricks/dbrx-instruct
|
||||
precision: float16
|
||||
- model: DBRX Instruct FP8
|
||||
mad_tag: pyt_vllm_dbrx_fp8
|
||||
model_repo: amd/dbrx-instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
|
||||
precision: float8
|
||||
- group: Google Gemma
|
||||
tag: gemma
|
||||
models:
|
||||
- model: Gemma 2 27B
|
||||
mad_tag: pyt_vllm_gemma-2-27b
|
||||
model_repo: google/gemma-2-27b
|
||||
url: https://huggingface.co/google/gemma-2-27b
|
||||
precision: float16
|
||||
- group: Cohere
|
||||
tag: cohere
|
||||
models:
|
||||
- model: C4AI Command R+ 08-2024
|
||||
mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
|
||||
model_repo: CohereForAI/c4ai-command-r-plus-08-2024
|
||||
url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
|
||||
precision: float16
|
||||
- model: C4AI Command R+ 08-2024 FP8
|
||||
mad_tag: pyt_vllm_command-r-plus_fp8
|
||||
model_repo: amd/c4ai-command-r-plus-FP8-KV
|
||||
url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
|
||||
precision: float8
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek MoE 16B
|
||||
mad_tag: pyt_vllm_deepseek-moe-16b-chat
|
||||
model_repo: deepseek-ai/deepseek-moe-16b-chat
|
||||
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
|
||||
precision: float16
|
||||
- group: Microsoft Phi
|
||||
tag: phi
|
||||
models:
|
||||
- model: Phi-4
|
||||
mad_tag: pyt_vllm_phi-4
|
||||
model_repo: microsoft/phi-4
|
||||
url: https://huggingface.co/microsoft/phi-4
|
||||
- group: TII Falcon
|
||||
tag: falcon
|
||||
models:
|
||||
- model: Falcon 180B
|
||||
mad_tag: pyt_vllm_falcon-180b
|
||||
model_repo: tiiuae/falcon-180B
|
||||
url: https://huggingface.co/tiiuae/falcon-180B
|
||||
precision: float16
|
||||
@@ -0,0 +1,55 @@
|
||||
xdit_diffusion_inference:
|
||||
docker:
|
||||
pull_tag: rocm/pytorch-xdit:v25.10
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e
|
||||
ROCm: 7.9.0
|
||||
components:
|
||||
TheRock: 7afbe45
|
||||
rccl: 9b04b2a
|
||||
composable_kernel: b7a806f
|
||||
rocm-libraries: f104555
|
||||
rocm-systems: 25922d0
|
||||
torch: 2.10.0a0+gite9c9017
|
||||
torchvision: 0.22.0a0+966da7e
|
||||
triton: 3.5.0+git52e49c12
|
||||
accelerate: 1.11.0.dev0
|
||||
aiter: 0.1.5.post4.dev20+ga25e55e79
|
||||
diffusers: 0.36.0.dev0
|
||||
xfuser: 0.4.4
|
||||
yunchang: 0.6.3.post1
|
||||
|
||||
model_groups:
|
||||
- group: Hunyuan Video
|
||||
tag: hunyuan
|
||||
models:
|
||||
- model: Hunyuan Video
|
||||
model_name: hunyuanvideo
|
||||
model_repo: tencent/HunyuanVideo
|
||||
revision: refs/pr/18
|
||||
url: https://huggingface.co/tencent/HunyuanVideo
|
||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||
mad_tag: pyt_xdit_hunyuanvideo
|
||||
- group: Wan-AI
|
||||
tag: wan
|
||||
models:
|
||||
- model: Wan2.1
|
||||
model_name: wan2_1-i2v-14b-720p
|
||||
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
|
||||
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
|
||||
github: https://github.com/Wan-Video/Wan2.1
|
||||
mad_tag: pyt_xdit_wan_2_1
|
||||
- model: Wan2.2
|
||||
model_name: wan2_2-i2v-a14b
|
||||
model_repo: Wan-AI/Wan2.2-I2V-A14B
|
||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
|
||||
github: https://github.com/Wan-Video/Wan2.2
|
||||
mad_tag: pyt_xdit_wan_2_2
|
||||
- group: FLUX
|
||||
tag: flux
|
||||
models:
|
||||
- model: FLUX.1
|
||||
model_name: FLUX.1-dev
|
||||
model_repo: black-forest-labs/FLUX.1-dev
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
github: https://github.com/black-forest-labs/flux
|
||||
mad_tag: pyt_xdit_flux
|
||||
@@ -0,0 +1,109 @@
|
||||
xdit_diffusion_inference:
|
||||
docker:
|
||||
- version: v25-11
|
||||
pull_tag: rocm/pytorch-xdit:v25.11
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216
|
||||
ROCm: 7.10.0
|
||||
supported_models:
|
||||
- group: Hunyuan Video
|
||||
models:
|
||||
- Hunyuan Video
|
||||
- group: Wan-AI
|
||||
models:
|
||||
- Wan2.1
|
||||
- Wan2.2
|
||||
- group: FLUX
|
||||
models:
|
||||
- FLUX.1
|
||||
whats_new:
|
||||
- "Minor bug fixes and clarifications to READMEs."
|
||||
- "Bumps TheRock, AITER, Diffusers, xDiT versions."
|
||||
- "Changes Aiter rounding mode for faster gfx942 FWD Attention."
|
||||
components:
|
||||
TheRock: 3e3f834
|
||||
rccl: d23d18f
|
||||
composable_kernel: 2570462
|
||||
rocm-libraries: 0588f07
|
||||
rocm-systems: 473025a
|
||||
torch: 73adac
|
||||
torchvision: f5c6c2e
|
||||
triton: 7416ffc
|
||||
accelerate: 34c1779
|
||||
aiter: de14bec
|
||||
diffusers: 40528e9
|
||||
xfuser: 83978b5
|
||||
yunchang: 2c9b712
|
||||
|
||||
- version: v25-10
|
||||
pull_tag: rocm/pytorch-xdit:v25.10
|
||||
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
||||
ROCm: 7.9.0
|
||||
supported_models:
|
||||
- group: Hunyuan Video
|
||||
models:
|
||||
- Hunyuan Video
|
||||
- group: Wan-AI
|
||||
models:
|
||||
- Wan2.1
|
||||
- Wan2.2
|
||||
- group: FLUX
|
||||
models:
|
||||
- FLUX.1
|
||||
whats_new:
|
||||
- "First official xDiT Docker Release for Diffusion Inference."
|
||||
- "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
|
||||
- "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
|
||||
components:
|
||||
TheRock: 7afbe45
|
||||
rccl: 9b04b2a
|
||||
composable_kernel: b7a806f
|
||||
rocm-libraries: f104555
|
||||
rocm-systems: 25922d0
|
||||
torch: 2.10.0a0+gite9c9017
|
||||
torchvision: 0.22.0a0+966da7e
|
||||
triton: 3.5.0+git52e49c12
|
||||
accelerate: 1.11.0.dev0
|
||||
aiter: 0.1.5.post4.dev20+ga25e55e79
|
||||
diffusers: 0.36.0.dev0
|
||||
xfuser: 0.4.4
|
||||
yunchang: 0.6.3.post1
|
||||
|
||||
model_groups:
|
||||
- group: Hunyuan Video
|
||||
tag: hunyuan
|
||||
models:
|
||||
- model: Hunyuan Video
|
||||
page_tag: hunyuan_tag
|
||||
model_name: hunyuanvideo
|
||||
model_repo: tencent/HunyuanVideo
|
||||
revision: refs/pr/18
|
||||
url: https://huggingface.co/tencent/HunyuanVideo
|
||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||
mad_tag: pyt_xdit_hunyuanvideo
|
||||
- group: Wan-AI
|
||||
tag: wan
|
||||
models:
|
||||
- model: Wan2.1
|
||||
page_tag: wan_21_tag
|
||||
model_name: wan2_1-i2v-14b-720p
|
||||
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
|
||||
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
|
||||
github: https://github.com/Wan-Video/Wan2.1
|
||||
mad_tag: pyt_xdit_wan_2_1
|
||||
- model: Wan2.2
|
||||
page_tag: wan_22_tag
|
||||
model_name: wan2_2-i2v-a14b
|
||||
model_repo: Wan-AI/Wan2.2-I2V-A14B
|
||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
|
||||
github: https://github.com/Wan-Video/Wan2.2
|
||||
mad_tag: pyt_xdit_wan_2_2
|
||||
- group: FLUX
|
||||
tag: flux
|
||||
models:
|
||||
- model: FLUX.1
|
||||
page_tag: flux_1_tag
|
||||
model_name: FLUX.1-dev
|
||||
model_repo: black-forest-labs/FLUX.1-dev
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
github: https://github.com/black-forest-labs/flux
|
||||
mad_tag: pyt_xdit_flux
|
||||
@@ -0,0 +1,91 @@
|
||||
docker:
|
||||
pull_tag: rocm/pytorch-xdit:v25.12
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256
|
||||
ROCm: 7.10.0
|
||||
whats_new:
|
||||
- "Adds T2V and TI2V support for Wan models."
|
||||
- "Adds support for SD-3.5 T2I model."
|
||||
components:
|
||||
TheRock:
|
||||
version: 3e3f834
|
||||
url: https://github.com/ROCm/TheRock
|
||||
rccl:
|
||||
version: d23d18f
|
||||
url: https://github.com/ROCm/rccl
|
||||
composable_kernel:
|
||||
version: 2570462
|
||||
url: https://github.com/ROCm/composable_kernel
|
||||
rocm-libraries:
|
||||
version: 0588f07
|
||||
url: https://github.com/ROCm/rocm-libraries
|
||||
rocm-systems:
|
||||
version: 473025a
|
||||
url: https://github.com/ROCm/rocm-systems
|
||||
torch:
|
||||
version: 73adac
|
||||
url: https://github.com/pytorch/pytorch
|
||||
torchvision:
|
||||
version: f5c6c2e
|
||||
url: https://github.com/pytorch/vision
|
||||
triton:
|
||||
version: 7416ffc
|
||||
url: https://github.com/triton-lang/triton
|
||||
accelerate:
|
||||
version: 34c1779
|
||||
url: https://github.com/huggingface/accelerate
|
||||
aiter:
|
||||
version: de14bec
|
||||
url: https://github.com/ROCm/aiter
|
||||
diffusers:
|
||||
version: 40528e9
|
||||
url: https://github.com/huggingface/diffusers
|
||||
xfuser:
|
||||
version: ccba9d5
|
||||
url: https://github.com/xdit-project/xDiT
|
||||
yunchang:
|
||||
version: 2c9b712
|
||||
url: https://github.com/feifeibear/long-context-attention
|
||||
supported_models:
|
||||
- group: Hunyuan Video
|
||||
js_tag: hunyuan
|
||||
models:
|
||||
- model: Hunyuan Video
|
||||
model_repo: tencent/HunyuanVideo
|
||||
revision: refs/pr/18
|
||||
url: https://huggingface.co/tencent/HunyuanVideo
|
||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||
mad_tag: pyt_xdit_hunyuanvideo
|
||||
js_tag: hunyuan_tag
|
||||
- group: Wan-AI
|
||||
js_tag: wan
|
||||
models:
|
||||
- model: Wan2.1
|
||||
model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||
github: https://github.com/Wan-Video/Wan2.1
|
||||
mad_tag: pyt_xdit_wan_2_1
|
||||
js_tag: wan_21_tag
|
||||
- model: Wan2.2
|
||||
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||
github: https://github.com/Wan-Video/Wan2.2
|
||||
mad_tag: pyt_xdit_wan_2_2
|
||||
js_tag: wan_22_tag
|
||||
- group: FLUX
|
||||
js_tag: flux
|
||||
models:
|
||||
- model: FLUX.1
|
||||
model_repo: black-forest-labs/FLUX.1-dev
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
github: https://github.com/black-forest-labs/flux
|
||||
mad_tag: pyt_xdit_flux
|
||||
js_tag: flux_1_tag
|
||||
- group: Stable Diffusion
|
||||
js_tag: stablediffusion
|
||||
models:
|
||||
- model: stable-diffusion-3.5-large
|
||||
model_repo: stabilityai/stable-diffusion-3.5-large
|
||||
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
|
||||
github: https://github.com/Stability-AI/sd3.5
|
||||
mad_tag: pyt_xdit_sd_3_5
|
||||
js_tag: stable_diffusion_3_5_large_tag
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user