mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
Compare commits
109 Commits
bb/pr-7.0
...
rocm-7.0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd95373e54 | ||
|
|
bafda50153 | ||
|
|
a471b4fc9b | ||
|
|
cae65c6c43 | ||
|
|
ec7c3b02e2 | ||
|
|
6a66167486 | ||
|
|
0f3543d6e8 | ||
|
|
678691c3d7 | ||
|
|
5cb3debed9 | ||
|
|
dd5d710727 | ||
|
|
eca1ecde92 | ||
|
|
ed1e414710 | ||
|
|
20c90fc406 | ||
|
|
fdc822ac3d | ||
|
|
6e39614b22 | ||
|
|
f7873ac74e | ||
|
|
a86fba556b | ||
|
|
7603fed080 | ||
|
|
9932cd4ac2 | ||
|
|
e8d104124f | ||
|
|
26f708da87 | ||
|
|
5a5e4dbb6e | ||
|
|
1c3dae75e1 | ||
|
|
bab853a0d3 | ||
|
|
5c7ccb3c26 | ||
|
|
f216b371a0 | ||
|
|
37faf170b1 | ||
|
|
8c40d14d7e | ||
|
|
d5101532f7 | ||
|
|
ef4e7ca1fe | ||
|
|
be68246824 | ||
|
|
1626ee4d8b | ||
|
|
f80044c7db | ||
|
|
412f6f2b0e | ||
|
|
bee7c1223f | ||
|
|
8af34e2026 | ||
|
|
0475650f00 | ||
|
|
cb73e9145a | ||
|
|
7316031fe6 | ||
|
|
76cb264f34 | ||
|
|
9c36e44a91 | ||
|
|
1037f8845a | ||
|
|
c2e31f2d2b | ||
|
|
882f71302a | ||
|
|
3d2f10ce0c | ||
|
|
81f5314368 | ||
|
|
60e3a8107c | ||
|
|
b800801427 | ||
|
|
5637deb81e | ||
|
|
df1ae524b2 | ||
|
|
06fd378036 | ||
|
|
cbd4e8f0ba | ||
|
|
1660ac335a | ||
|
|
b357ba993b | ||
|
|
29f4d65da5 | ||
|
|
2de5a33aec | ||
|
|
e805e98701 | ||
|
|
e1a1a4e712 | ||
|
|
8eee155585 | ||
|
|
e3227d14e6 | ||
|
|
aebf1b4480 | ||
|
|
519364179c | ||
|
|
c2080a90c7 | ||
|
|
08dad2dc41 | ||
|
|
b4c5980a96 | ||
|
|
52ce201401 | ||
|
|
505233473d | ||
|
|
4f4f4556a5 | ||
|
|
4f8426376b | ||
|
|
d476d09aff | ||
|
|
04beef8773 | ||
|
|
95d1752874 | ||
|
|
eabf72c2db | ||
|
|
53bd9b5da4 | ||
|
|
0665e73e2d | ||
|
|
264d353071 | ||
|
|
d58e2b16db | ||
|
|
010a191938 | ||
|
|
a7edb17538 | ||
|
|
59afdef1fb | ||
|
|
ea8ff1b17d | ||
|
|
808a7709aa | ||
|
|
8cc17e307c | ||
|
|
7fd6146b16 | ||
|
|
e839054e56 | ||
|
|
78c4a4c12a | ||
|
|
c587d75701 | ||
|
|
a88151f505 | ||
|
|
ff7d9eb17a | ||
|
|
2ec8757ffa | ||
|
|
28c3384433 | ||
|
|
91c26c502d | ||
|
|
0ae99ea21e | ||
|
|
60571680b5 | ||
|
|
e24bd407c1 | ||
|
|
19156cf2c6 | ||
|
|
0d5f17a58b | ||
|
|
6b93d7a75a | ||
|
|
acdb5c90a6 | ||
|
|
073ac54e47 | ||
|
|
d0377dd947 | ||
|
|
35ec186cd9 | ||
|
|
da340c3d05 | ||
|
|
1d127d987b | ||
|
|
71bc63d2d8 | ||
|
|
7b087769a2 | ||
|
|
08d0840b69 | ||
|
|
ae734e7846 | ||
|
|
67f988f58b |
@@ -40,6 +40,7 @@ parameters:
|
||||
- gfortran
|
||||
- libgfortran5
|
||||
- libopenblas-dev
|
||||
- liblapack-dev
|
||||
- name: pipModules
|
||||
type: object
|
||||
default:
|
||||
@@ -125,10 +126,13 @@ jobs:
|
||||
aggregatePipeline: ${{ parameters.aggregatePipeline }}
|
||||
${{ if parameters.triggerDownstreamJobs }}:
|
||||
downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
|
||||
# NOTE: content between `---` is for transition support between old/new build systems
|
||||
# and should be removed once transition is complete.
|
||||
# -----------------------------
|
||||
# Build and install gtest and lapack
|
||||
# $(Pipeline.Workspace)/deps is a temporary folder for the build process
|
||||
# $(Pipeline.Workspace)/s/deps is part of the hipSPARSELt repo
|
||||
- script: mkdir $(Pipeline.Workspace)/deps
|
||||
- script: mkdir -p $(Pipeline.Workspace)/deps
|
||||
displayName: Create temp folder for external dependencies
|
||||
# hipSPARSELt already has a CMake script for external deps, so we can just run that
|
||||
# https://github.com/ROCm/hipSPARSELt/blob/develop/deps/CMakeLists.txt
|
||||
@@ -144,22 +148,35 @@ jobs:
|
||||
- script: sudo make install
|
||||
displayName: Install hipSPARSELt external dependencies
|
||||
workingDirectory: $(Pipeline.Workspace)/deps
|
||||
# -----------------------------
|
||||
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
|
||||
parameters:
|
||||
os: ${{ job.os }}
|
||||
# NOTE: the following options are old build only
|
||||
# and can be removed after full transition to new build
|
||||
# -DAMDGPU_TARGETS=${{ job.target }}
|
||||
# -DCMAKE_Fortran_COMPILER=f95
|
||||
# -DTensile_LOGIC=
|
||||
# -DTensile_CPU_THREADS=
|
||||
# -DTensile_LIBRARY_FORMAT=msgpack
|
||||
# -DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
# -DBUILD_CLIENTS_TESTS=ON
|
||||
# -DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
extraBuildFlags: >-
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
|
||||
-DCMAKE_C_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DGPU_TARGETS=${{ job.target }}
|
||||
-DAMDGPU_TARGETS=${{ job.target }}
|
||||
-DCMAKE_Fortran_COMPILER=f95
|
||||
-DTensile_LOGIC=
|
||||
-DTensile_CPU_THREADS=
|
||||
-DTensile_LIBRARY_FORMAT=msgpack
|
||||
-DCMAKE_PREFIX_PATH="$(Agent.BuildDirectory)/rocm"
|
||||
-DROCM_PATH=$(Agent.BuildDirectory)/rocm
|
||||
-DBUILD_CLIENTS_TESTS=ON
|
||||
-DBUILD_USE_LOCAL_TENSILE=OFF
|
||||
-DHIPSPARSELT_ENABLE_FETCH=ON
|
||||
-GNinja
|
||||
${{ if ne(parameters.sparseCheckoutDir, '') }}:
|
||||
cmakeSourceDir: $(Build.SourcesDirectory)/projects/hipsparselt
|
||||
|
||||
@@ -62,6 +62,7 @@ CPU
|
||||
CPUs
|
||||
Cron
|
||||
CSC
|
||||
CSDATA
|
||||
CSE
|
||||
CSV
|
||||
CSn
|
||||
@@ -71,6 +72,7 @@ CU
|
||||
CUDA
|
||||
CUs
|
||||
CXX
|
||||
CX
|
||||
Cavium
|
||||
CentOS
|
||||
ChatGPT
|
||||
@@ -81,6 +83,7 @@ CommonMark
|
||||
Concretized
|
||||
Conda
|
||||
ConnectX
|
||||
CountOnes
|
||||
CuPy
|
||||
da
|
||||
Dashboarding
|
||||
@@ -97,6 +100,7 @@ DIMM
|
||||
DKMS
|
||||
DL
|
||||
DMA
|
||||
DOMContentLoaded
|
||||
DNN
|
||||
DNNL
|
||||
DPM
|
||||
@@ -115,6 +119,8 @@ Dependabot
|
||||
Deprecations
|
||||
DevCap
|
||||
DirectX
|
||||
Disaggregated
|
||||
disaggregated
|
||||
Dockerfile
|
||||
Dockerized
|
||||
Doxygen
|
||||
@@ -124,8 +130,10 @@ ENDPGM
|
||||
EPYC
|
||||
ESXi
|
||||
EoS
|
||||
etcd
|
||||
fas
|
||||
FBGEMM
|
||||
FIFOs
|
||||
FFT
|
||||
FFTs
|
||||
FFmpeg
|
||||
@@ -174,6 +182,7 @@ GPUs
|
||||
Graphbolt
|
||||
GraphSage
|
||||
GRBM
|
||||
GRE
|
||||
GenAI
|
||||
GenZ
|
||||
GitHub
|
||||
@@ -201,6 +210,7 @@ Higgs
|
||||
href
|
||||
Hyperparameters
|
||||
Huggingface
|
||||
IB
|
||||
ICD
|
||||
ICT
|
||||
ICV
|
||||
@@ -209,8 +219,11 @@ IDEs
|
||||
IFWI
|
||||
IMDb
|
||||
IncDec
|
||||
instrSize
|
||||
interpolators
|
||||
IOMMU
|
||||
IOP
|
||||
IOPS
|
||||
IOPM
|
||||
IOV
|
||||
IRQ
|
||||
@@ -247,12 +260,15 @@ LLM
|
||||
LLMs
|
||||
LLVM
|
||||
LM
|
||||
LRU
|
||||
LSAN
|
||||
LSan
|
||||
LTS
|
||||
LSTMs
|
||||
LteAll
|
||||
LanguageCrossEntropy
|
||||
LoRA
|
||||
MECO
|
||||
MEM
|
||||
MERCHANTABILITY
|
||||
MFMA
|
||||
@@ -271,6 +287,7 @@ MNIST
|
||||
MPI
|
||||
MPT
|
||||
MSVC
|
||||
mul
|
||||
MVAPICH
|
||||
MVFFR
|
||||
Makefile
|
||||
@@ -289,6 +306,7 @@ MirroredStrategy
|
||||
Mixtral
|
||||
MosaicML
|
||||
MoEs
|
||||
Mooncake
|
||||
Mpops
|
||||
Multicore
|
||||
Multithreaded
|
||||
@@ -349,6 +367,7 @@ PCC
|
||||
PCI
|
||||
PCIe
|
||||
PEFT
|
||||
perf
|
||||
PEQT
|
||||
PIL
|
||||
PILImage
|
||||
@@ -432,7 +451,9 @@ SKU
|
||||
SKUs
|
||||
SLES
|
||||
SLURM
|
||||
Slurm
|
||||
SMEM
|
||||
SMFMA
|
||||
SMI
|
||||
SMT
|
||||
SPI
|
||||
@@ -444,18 +465,23 @@ SWE
|
||||
SerDes
|
||||
ShareGPT
|
||||
Shlens
|
||||
simd
|
||||
Skylake
|
||||
Softmax
|
||||
Spack
|
||||
SplitK
|
||||
Supermicro
|
||||
Szegedy
|
||||
TagRAM
|
||||
TCA
|
||||
TCC
|
||||
TCCs
|
||||
TCI
|
||||
TCIU
|
||||
TCP
|
||||
TCR
|
||||
THREADGROUPS
|
||||
threadgroups
|
||||
TensorRT
|
||||
TensorFloat
|
||||
TF
|
||||
@@ -499,6 +525,7 @@ UltraChat
|
||||
Uncached
|
||||
Unittests
|
||||
Unhandled
|
||||
unwindowed
|
||||
VALU
|
||||
VBIOS
|
||||
VCN
|
||||
@@ -515,11 +542,13 @@ Vanhoucke
|
||||
Vulkan
|
||||
WGP
|
||||
WGPs
|
||||
WR
|
||||
WX
|
||||
WikiText
|
||||
Wojna
|
||||
Workgroups
|
||||
Writebacks
|
||||
xcc
|
||||
XCD
|
||||
XCDs
|
||||
XGBoost
|
||||
@@ -540,6 +569,7 @@ ZenDNN
|
||||
accuracies
|
||||
activations
|
||||
addr
|
||||
addEventListener
|
||||
ade
|
||||
ai
|
||||
alloc
|
||||
@@ -555,6 +585,7 @@ autogenerated
|
||||
autotune
|
||||
avx
|
||||
awk
|
||||
az
|
||||
backend
|
||||
backends
|
||||
bb
|
||||
@@ -572,6 +603,7 @@ boson
|
||||
bosons
|
||||
br
|
||||
BrainFloat
|
||||
btn
|
||||
buildable
|
||||
bursty
|
||||
bzip
|
||||
@@ -583,18 +615,21 @@ centric
|
||||
changelog
|
||||
checkpointing
|
||||
chiplet
|
||||
classList
|
||||
cmake
|
||||
cmd
|
||||
coalescable
|
||||
codename
|
||||
collater
|
||||
comgr
|
||||
compat
|
||||
completers
|
||||
composable
|
||||
concretization
|
||||
config
|
||||
configs
|
||||
conformant
|
||||
const
|
||||
constructible
|
||||
convolutional
|
||||
convolves
|
||||
@@ -658,6 +693,7 @@ exascale
|
||||
executables
|
||||
ffmpeg
|
||||
filesystem
|
||||
forEach
|
||||
fortran
|
||||
fp
|
||||
framebuffer
|
||||
@@ -666,6 +702,7 @@ galb
|
||||
gcc
|
||||
gdb
|
||||
gemm
|
||||
getAttribute
|
||||
gfortran
|
||||
gfx
|
||||
githooks
|
||||
@@ -747,6 +784,7 @@ lossy
|
||||
macOS
|
||||
matchers
|
||||
maxtext
|
||||
megablocks
|
||||
megatron
|
||||
microarchitecture
|
||||
migraphx
|
||||
@@ -775,6 +813,7 @@ opencv
|
||||
openmp
|
||||
openssl
|
||||
optimizers
|
||||
ol
|
||||
os
|
||||
oversubscription
|
||||
pageable
|
||||
@@ -822,6 +861,8 @@ recommenders
|
||||
quantile
|
||||
quantizer
|
||||
quasirandom
|
||||
querySelector
|
||||
querySelectorAll
|
||||
queueing
|
||||
qwen
|
||||
radeon
|
||||
@@ -840,6 +881,8 @@ req
|
||||
resampling
|
||||
rescaling
|
||||
reusability
|
||||
rhel
|
||||
rl
|
||||
RLHF
|
||||
roadmap
|
||||
roc
|
||||
@@ -884,19 +927,23 @@ scalability
|
||||
scalable
|
||||
scipy
|
||||
seealso
|
||||
selectedTag
|
||||
sendmsg
|
||||
seqs
|
||||
serializers
|
||||
setAttribute
|
||||
sglang
|
||||
shader
|
||||
sharding
|
||||
sigmoid
|
||||
sles
|
||||
sm
|
||||
smi
|
||||
softmax
|
||||
spack
|
||||
spmm
|
||||
src
|
||||
stanford
|
||||
stochastically
|
||||
strided
|
||||
subcommand
|
||||
@@ -913,6 +960,7 @@ symlink
|
||||
symlinks
|
||||
sys
|
||||
tabindex
|
||||
targetContainer
|
||||
td
|
||||
tensorfloat
|
||||
th
|
||||
|
||||
1678
CHANGELOG.md
1678
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
2641
RELEASE.md
2641
RELEASE.md
File diff suppressed because it is too large
Load Diff
27
default.xml
27
default.xml
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-6.4.3"
|
||||
<default revision="refs/tags/rocm-7.0.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
@@ -9,6 +9,7 @@
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="aqlprofile" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
@@ -22,7 +23,7 @@
|
||||
<project name="rocprofiler-systems" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="HIP" />
|
||||
<project name="hip" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
@@ -37,36 +38,26 @@
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIOpen" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="Tensile" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipBLAS-common" />
|
||||
<project groups="mathlibs" name="hipBLAS" />
|
||||
<project groups="mathlibs" name="hipBLASLt" />
|
||||
<project groups="mathlibs" name="hipCUB" />
|
||||
<project groups="mathlibs" name="hipFFT" />
|
||||
<project groups="mathlibs" name="hipRAND" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipSPARSE" />
|
||||
<project groups="mathlibs" name="hipSPARSELt" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocBLAS" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<!-- The following components have been migrated to rocm-libraries:
|
||||
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||
rocSPARSE rocThrust Tensile -->
|
||||
<project groups="mathlibs" name="rocm-libraries" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocFFT" />
|
||||
<project groups="mathlibs" name="rocPRIM" />
|
||||
<project groups="mathlibs" name="rocRAND" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocSPARSE" />
|
||||
<project groups="mathlibs" name="rocThrust" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
|
||||
@@ -29,7 +29,7 @@ additional licenses. Please review individual repositories for more information.
|
||||
| [AMD SMI](https://github.com/ROCm/amdsmi) | [MIT](https://github.com/ROCm/amdsmi/blob/amd-staging/LICENSE) |
|
||||
| [aomp](https://github.com/ROCm/aomp/) | [Apache 2.0](https://github.com/ROCm/aomp/blob/aomp-dev/LICENSE) |
|
||||
| [aomp-extras](https://github.com/ROCm/aomp-extras/) | [MIT](https://github.com/ROCm/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [AQLprofile] | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE.md) |
|
||||
| [AQLprofile](https://github.com/rocm/aqlprofile/) | [MIT](https://github.com/ROCm/aqlprofile/blob/amd-staging/LICENSE.md) |
|
||||
| [Code Object Manager (Comgr)](https://github.com/ROCm/llvm-project/tree/amd-staging/amd/comgr) | [The University of Illinois/NCSA](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/comgr/LICENSE.txt) |
|
||||
| [Composable Kernel](https://github.com/ROCm/composable_kernel) | [MIT](https://github.com/ROCm/composable_kernel/blob/develop/LICENSE) |
|
||||
| [half](https://github.com/ROCm/half/) | [MIT](https://github.com/ROCm/half/blob/rocm/LICENSE.txt) |
|
||||
|
||||
@@ -1,133 +1,136 @@
|
||||
ROCm Version,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,Azure Linux 3.0 [#mi300x-past-60]_,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat]_,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat]_,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat]_,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat]_,N/A,N/A,N/A,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.2,1.2,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
Thrust,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
KMD & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
ROCm Version,7.0.1/7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0, 6.1.5, 6.1.2, 6.1.1, 6.1.0, 6.0.2, 6.0.0
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2,"Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04","Ubuntu 24.04.1, 24.04",Ubuntu 24.04,,,,,,
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5,"Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4","Ubuntu 22.04.5, 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3","Ubuntu 22.04.4, 22.04.3, 22.04.2","Ubuntu 22.04.4, 22.04.3, 22.04.2"
|
||||
,,,,,,,,,,,,,,"Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5","Ubuntu 20.04.6, 20.04.5"
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.6, 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.5, 9.4","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.4, 9.3, 9.2","RHEL 9.3, 9.2","RHEL 9.3, 9.2"
|
||||
,RHEL 8.10 [#rhel-700-past-60]_,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,RHEL 8.10,"RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.10, 8.9","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8","RHEL 8.9, 8.8"
|
||||
,SLES 15 SP7 [#sles-db-700-past-60]_,"SLES 15 SP7, SP6","SLES 15 SP7, SP6",SLES 15 SP6,SLES 15 SP6,"SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP6, SP5","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4","SLES 15 SP5, SP4"
|
||||
,,,,,,,,,,,,,,,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9,CentOS 7.9
|
||||
,"Oracle Linux 9, 8 [#ol-700-mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_","Oracle Linux 9, 8 [#mi300x-past-60]_",Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.10 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,Oracle Linux 8.9 [#mi300x-past-60]_,,,
|
||||
,Debian 12 [#sles-db-700-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,Debian 12 [#single-node-past-60]_,,,,,,,,,,,
|
||||
,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,Azure Linux 3.0 [#az-mi300x-630-past-60]_,,,,,,,,,,,,
|
||||
,Rocky Linux 9 [#rl-700-past-60]_,,,,,,,,,,,,,,,,,,
|
||||
,.. _architecture-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,,,,,,,,,,,,,,,,,,
|
||||
,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,RDNA4,RDNA4,,,,,,,,,,,,,,,
|
||||
,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os-past-60]_,,,,,,,,,,,,,,,,,,
|
||||
,gfx1201 [#RDNA-OS-700-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,gfx1201 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1200 [#RDNA-OS-700-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,gfx1200 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1101 [#RDNA-OS-700-past-60]_ [#rd-v710-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_ [#7700XT-OS-past-60]_,gfx1101 [#RDNA-OS-past-60]_,,,,,,,,,,,,,,,
|
||||
,gfx1100 [#RDNA-OS-700-past-60]_,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030 [#RDNA-OS-700-past-60]_ [#rd-v620-past-60]_,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030,gfx1030
|
||||
,gfx942 [#mi325x-os-past-60]_ [#mi300x-os-past-60]_ [#mi300A-os-past-60]_,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942,gfx942 [#mi300_624-past-60]_,gfx942 [#mi300_622-past-60]_,gfx942 [#mi300_621-past-60]_,gfx942 [#mi300_620-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_612-past-60]_, gfx942 [#mi300_611-past-60]_, gfx942 [#mi300_610-past-60]_, gfx942 [#mi300_602-past-60]_, gfx942 [#mi300_600-past-60]_
|
||||
,gfx90a [#mi200x-os-past-60]_,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a,gfx90a
|
||||
,gfx908 [#mi100-os-past-60]_,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908,gfx908
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 1.13","2.4, 2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.3, 2.2, 2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13","2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.17.0, 2.16.2, 2.15.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.16.1, 2.15.1, 2.14.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.15.0, 2.14.0, 2.13.1","2.14.0, 2.13.1, 2.12.1","2.14.0, 2.13.1, 2.12.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.35,0.4.35,0.4.35,0.4.31,0.4.31,0.4.31,0.4.31,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26,0.4.26
|
||||
:doc:`verl <../compatibility/ml-compatibility/verl-compatibility>` [#verl_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.3.0.post0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,85f95ae,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`DGL <../compatibility/ml-compatibility/dgl-compatibility>` [#dgl_compat-past-60]_,N/A,N/A,N/A,N/A,2.4.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.7.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Taichi <../compatibility/ml-compatibility/taichi-compatibility>` [#taichi_compat-past-60]_,N/A,N/A,N/A,N/A,N/A,N/A,1.8.0b1,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`Ray <../compatibility/ml-compatibility/ray-compatibility>` [#ray_compat-past-60]_,N/A,N/A,N/A,2.48.0.post0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`llama.cpp <../compatibility/ml-compatibility/llama-cpp-compatibility>` [#llama-cpp_compat-past-60]_,N/A,N/A,N/A,N/A,b5997,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.20.0,1.20.0,1.20.0,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.17.3,1.14.1,1.14.1
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.3.0,>=1.2.0,>=1.2.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.15.0,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1,>=1.14.1
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
Thrust,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
CUB,2.6.0,2.5.0,2.5.0,2.5.0,2.5.0,2.3.2,2.3.2,2.3.2,2.3.2,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.1,2.0.1
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
DRIVER & USER SPACE [#kfd_support-past-60]_,.. _kfd-userspace-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.1 [#driver_patch-past-60]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.4.x, 6.3.x, 6.2.x, 6.1.x, 6.0.x, 5.7.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x","6.2.x, 6.1.x, 6.0.x, 5.7.x, 5.6.x"
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.0,2.11.0,2.11.0,2.11.0,2.10.0,2.10.0,2.10.0,2.10.0,2.9.0,2.9.0,2.9.0,2.9.0,2.8.0,2.8.0
|
||||
:doc:`MIOpen <miopen:index>`,3.5.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0,2.5.0
|
||||
:doc:`rocAL <rocal:index>`,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0,2.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`rocDecode <rocdecode:index>`,1.0.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,N/A,N/A
|
||||
:doc:`rocJPEG <rocjpeg:index>`,1.1.0,0.8.0,0.8.0,0.8.0,0.8.0,0.6.0,0.6.0,0.6.0,0.6.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.3.1,0.3.1,0.3.1,0.3.1,0.2.0,0.2.0,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`RPP <rpp:index>`,2.0.0,1.9.10,1.9.10,1.9.10,1.9.10,1.9.1,1.9.1,1.9.1,1.9.1,1.8.0,1.8.0,1.8.0,1.8.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`RCCL <rccl:index>`,2.26.6,2.22.3,2.22.3,2.22.3,2.22.3,2.21.5,2.21.5,2.21.5,2.21.5,2.20.5,2.20.5,2.20.5,2.20.5,2.18.6,2.18.6,2.18.6,2.18.6,2.18.3,2.18.3
|
||||
:doc:`rocSHMEM <rocshmem:index>`,3.0.0,2.0.1,2.0.1,2.0.0,2.0.0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.0,2.1.0,2.1.0,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,1.0.0,0.12.1,0.12.1,0.12.1,0.12.0,0.10.0,0.10.0,0.10.0,0.10.0,0.8.0,0.8.0,0.8.0,0.8.0,0.7.0,0.7.0,0.7.0,0.7.0,0.6.0,0.6.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.20,1.0.18,1.0.18,1.0.18,1.0.18,1.0.17,1.0.17,1.0.17,1.0.17,1.0.16,1.0.15,1.0.15,1.0.14,1.0.14,1.0.14,1.0.14,1.0.14,1.0.13,1.0.13
|
||||
:doc:`hipfort <hipfort:index>`,0.7.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.1,0.5.1,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0
|
||||
:doc:`hipRAND <hiprand:index>`,3.0.0,2.12.0,2.12.0,2.12.0,2.12.0,2.11.1,2.11.1,2.11.1,2.11.0,2.11.1,2.11.0,2.11.0,2.11.0,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16,2.10.16
|
||||
:doc:`hipSOLVER <hipsolver:index>`,3.0.0,2.4.0,2.4.0,2.4.0,2.4.0,2.3.0,2.3.0,2.3.0,2.3.0,2.2.0,2.2.0,2.2.0,2.2.0,2.1.1,2.1.1,2.1.1,2.1.0,2.0.0,2.0.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,4.0.1,3.2.0,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.1.1,3.1.1,3.1.1,3.1.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.3,0.2.3,0.2.3,0.2.3,0.2.2,0.2.2,0.2.2,0.2.2,0.2.1,0.2.1,0.2.1,0.2.1,0.2.0,0.2.0,0.1.0,0.1.0,0.1.0,0.1.0
|
||||
:doc:`rocALUTION <rocalution:index>`,4.0.0,3.2.3,3.2.3,3.2.3,3.2.2,3.2.1,3.2.1,3.2.1,3.2.1,3.2.1,3.2.0,3.2.0,3.2.0,3.1.1,3.1.1,3.1.1,3.1.1,3.0.3,3.0.3
|
||||
:doc:`rocBLAS <rocblas:index>`,5.0.0,4.4.1,4.4.1,4.4.0,4.4.0,4.3.0,4.3.0,4.3.0,4.3.0,4.2.4,4.2.1,4.2.1,4.2.0,4.1.2,4.1.2,4.1.0,4.1.0,4.0.0,4.0.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.34,1.0.32,1.0.32,1.0.32,1.0.32,1.0.31,1.0.31,1.0.31,1.0.31,1.0.30,1.0.29,1.0.29,1.0.28,1.0.27,1.0.27,1.0.27,1.0.26,1.0.25,1.0.23
|
||||
:doc:`rocRAND <rocrand:index>`,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.0,3.2.0,3.2.0,3.2.0,3.1.1,3.1.0,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,2.10.17
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.30.0,3.28.2,3.28.2,3.28.0,3.28.0,3.27.0,3.27.0,3.27.0,3.27.0,3.26.2,3.26.0,3.26.0,3.26.0,3.25.0,3.25.0,3.25.0,3.25.0,3.24.0,3.24.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,4.0.2,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.2,3.1.2,3.1.2,3.1.2,3.0.2,3.0.2
|
||||
:doc:`rocWMMA <rocwmma:index>`,2.0.0,1.7.0,1.7.0,1.7.0,1.7.0,1.6.0,1.6.0,1.6.0,1.6.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.44.0,4.43.0,4.43.0,4.43.0,4.43.0,4.42.0,4.42.0,4.42.0,4.42.0,4.41.0,4.41.0,4.41.0,4.41.0,4.40.0,4.40.0,4.40.0,4.40.0,4.39.0,4.39.0
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`hipCUB <hipcub:index>`,4.0.0,3.4.0,3.4.0,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.1,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`hipTensor <hiptensor:index>`,2.0.0,1.5.0,1.5.0,1.5.0,1.5.0,1.4.0,1.4.0,1.4.0,1.4.0,1.3.0,1.3.0,1.3.0,1.3.0,1.2.0,1.2.0,1.2.0,1.2.0,1.1.0,1.1.0
|
||||
:doc:`rocPRIM <rocprim:index>`,4.0.0,3.4.1,3.4.1,3.4.0,3.4.0,3.3.0,3.3.0,3.3.0,3.3.0,3.2.2,3.2.0,3.2.0,3.2.0,3.1.0,3.1.0,3.1.0,3.1.0,3.0.0,3.0.0
|
||||
:doc:`rocThrust <rocthrust:index>`,4.0.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.3.0,3.1.1,3.1.0,3.1.0,3.0.1,3.0.1,3.0.1,3.0.1,3.0.1,3.0.0,3.0.0
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
SUPPORT LIBS,,,,,,,,,,,,,,,,,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,7.0.51830,6.4.43483,6.4.43483,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.0,6.4.3,6.4.2,6.4.1,6.4.0,6.3.3,6.3.2,6.3.1,6.3.0,6.2.4,6.2.2,6.2.1,6.2.0,6.1.5,6.1.2,6.1.1,6.1.0,6.0.2,6.0.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,N/A [#ROCT-rocr-past-60]_,20240607.5.7,20240607.5.7,20240607.4.05,20240607.1.4246,20240125.5.08,20240125.5.08,20240125.5.08,20240125.3.30,20231016.2.245,20231016.2.245
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,26.0.0,25.5.1,25.5.1,25.4.2,25.3.0,24.7.1,24.7.1,24.7.1,24.7.1,24.6.3,24.6.3,24.6.3,24.6.2,24.5.1,24.5.1,24.5.1,24.4.1,23.4.2,23.4.2
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.7.0,7.5.0,7.5.0,7.5.0,7.4.0,7.4.0,7.4.0,7.4.0,7.3.0,7.3.0,7.3.0,7.3.0,7.2.0,7.2.0,7.0.0,7.0.0,6.0.2,6.0.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.1.0,1.0.60204,1.0.60202,1.0.60201,1.0.60200,1.0.60105,1.0.60102,1.0.60101,1.0.60100,1.0.60002,1.0.60000
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
PERFORMANCE TOOLS,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.1.1,3.1.1,3.1.0,3.1.0,3.0.0,3.0.0,3.0.0,3.0.0,2.0.1,2.0.1,2.0.1,2.0.1,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.0,1.0.2,1.0.2,1.0.1,1.0.0,0.1.2,0.1.1,0.1.0,0.1.0,1.11.2,1.11.2,1.11.2,1.11.2,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70000,2.0.60403,2.0.60402,2.0.60401,2.0.60400,2.0.60303,2.0.60302,2.0.60301,2.0.60300,2.0.60204,2.0.60202,2.0.60201,2.0.60200,2.0.60105,2.0.60102,2.0.60101,2.0.60100,2.0.60002,2.0.60000
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,0.6.0,0.6.0,0.6.0,0.6.0,0.5.0,0.5.0,0.5.0,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,N/A,N/A,N/A,N/A,N/A,N/A
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.70000,4.1.60403,4.1.60402,4.1.60401,4.1.60400,4.1.60303,4.1.60302,4.1.60301,4.1.60300,4.1.60204,4.1.60202,4.1.60201,4.1.60200,4.1.60105,4.1.60102,4.1.60101,4.1.60100,4.1.60002,4.1.60000
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
DEVELOPMENT TOOLS,,,,,,,,,,,,,,,,,,,
|
||||
:doc:`HIPIFY <hipify:index>`,20.0.0,19.0.0,19.0.0,19.0.0,19.0.0,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.14.0,0.13.0,0.13.0,0.13.0,0.13.0,0.12.0,0.12.0,0.12.0,0.12.0,0.11.0,0.11.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.3,0.77.2,0.77.2,0.77.2,0.77.2,0.77.0,0.77.0,0.77.0,0.77.0,0.76.0,0.76.0,0.76.0,0.76.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0,0.71.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,15.2.0,14.2.0,14.2.0,14.2.0,14.2.0,14.1.0,14.1.0,14.1.0,14.1.0,13.2.0,13.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.4.0,0.3.0,0.3.0,0.3.0,0.3.0,N/A,N/A
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.0.4,2.0.4,2.0.4,2.0.4,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3,2.0.3
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0,0.5.0
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.1.1,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0,1.0.0
|
||||
`Flang <https://github.com/ROCm/flang>`_,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24455,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
:doc:`llvm-project <llvm-project:index>`,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25314,19.0.0.25224,19.0.0.25224,19.0.0.25184,19.0.0.25133,18.0.0.25012,18.0.0.25012,18.0.0.24491,18.0.0.24491,18.0.0.24392,18.0.0.24355,18.0.0.24355,18.0.0.24232,17.0.0.24193,17.0.0.24193,17.0.0.24154,17.0.0.24103,17.0.0.24012,17.0.0.23483
|
||||
,,,,,,,,,,,,,,,,,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix-past-60:,,,,,,,,,,,,,,,,,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
:doc:`HIP <hip:index>`,7.0.51830,6.4.43484,6.4.43484,6.4.43483,6.4.43482,6.3.42134,6.3.42134,6.3.42133,6.3.42131,6.2.41134,6.2.41134,6.2.41134,6.2.41133,6.1.40093,6.1.40093,6.1.40092,6.1.40091,6.1.32831,6.1.32830
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.15.0,1.15.0,1.15.0,1.15.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.14.0,1.13.0,1.13.0,1.13.0,1.13.0,1.13.0,1.12.0,1.12.0
|
||||
|
||||
|
@@ -23,142 +23,160 @@ compatibility and system requirements.
|
||||
.. container:: format-big-table
|
||||
|
||||
.. csv-table::
|
||||
:header: "ROCm Version", "6.4.3", "6.4.2", "6.3.0"
|
||||
:header: "ROCm Version", "7.0.1/7.0.0", "6.4.3", "6.3.0"
|
||||
:stub-columns: 1
|
||||
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.2,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
:ref:`Operating systems & kernels <OS-kernel-versions>`,Ubuntu 24.04.3,Ubuntu 24.04.2,Ubuntu 24.04.2
|
||||
,Ubuntu 22.04.5,Ubuntu 22.04.5,Ubuntu 22.04.5
|
||||
,"RHEL 9.6, 9.4","RHEL 9.6, 9.4","RHEL 9.5, 9.4"
|
||||
,RHEL 8.10,RHEL 8.10,RHEL 8.10
|
||||
,"SLES 15 SP7, SP6","SLES 15 SP7, SP6","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#mi300x]_","Oracle Linux 9, 8 [#mi300x]_",Oracle Linux 8.10 [#mi300x]_
|
||||
,Debian 12 [#single-node]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#mi300x]_,Azure Linux 3.0 [#mi300x]_,
|
||||
,RHEL 8.10 [#rhel-700]_,RHEL 8.10,RHEL 8.10
|
||||
,SLES 15 SP7 [#sles-db-700]_,"SLES 15 SP7, SP6","SLES 15 SP6, SP5"
|
||||
,"Oracle Linux 9, 8 [#ol-700-mi300x]_","Oracle Linux 9, 8 [#ol-mi300x]_",Oracle Linux 8.10 [#ol-mi300x]_
|
||||
,Debian 12 [#sles-db-700]_,Debian 12 [#single-node]_,
|
||||
,Azure Linux 3.0 [#az-mi300x]_,Azure Linux 3.0 [#az-mi300x]_,
|
||||
,Rocky Linux 9 [#rl-700]_,,
|
||||
,.. _architecture-support-compatibility-matrix:,,
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA3,CDNA3,CDNA3
|
||||
:doc:`Architecture <rocm-install-on-linux:reference/system-requirements>`,CDNA4,,
|
||||
,CDNA3,CDNA3,CDNA3
|
||||
,CDNA2,CDNA2,CDNA2
|
||||
,CDNA,CDNA,CDNA
|
||||
,RDNA4,RDNA4,
|
||||
,RDNA3,RDNA3,RDNA3
|
||||
,RDNA2,RDNA2,RDNA2
|
||||
,.. _gpu-support-compatibility-matrix:,,
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx1201 [#RDNA-OS]_,gfx1201 [#RDNA-OS]_,
|
||||
,gfx1200 [#RDNA-OS]_,gfx1200 [#RDNA-OS]_,
|
||||
,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
|
||||
,gfx1100,gfx1100,gfx1100
|
||||
,gfx1030,gfx1030,gfx1030
|
||||
,gfx942,gfx942,gfx942
|
||||
,gfx90a,gfx90a,gfx90a
|
||||
,gfx908,gfx908,gfx908
|
||||
:doc:`GPU / LLVM target <rocm-install-on-linux:reference/system-requirements>`,gfx950 [#mi350x-os]_,,
|
||||
,gfx1201 [#RDNA-OS-700]_,gfx1201 [#RDNA-OS]_,
|
||||
,gfx1200 [#RDNA-OS-700]_,gfx1200 [#RDNA-OS]_,
|
||||
,gfx1101 [#RDNA-OS-700]_ [#rd-v710]_,gfx1101 [#RDNA-OS]_ [#7700XT-OS]_,
|
||||
,gfx1100 [#RDNA-OS-700]_,gfx1100,gfx1100
|
||||
,gfx1030 [#RDNA-OS-700]_ [#rd-v620]_,gfx1030,gfx1030
|
||||
,gfx942 [#mi325x-os]_ [#mi300x-os]_ [#mi300A-os]_,gfx942,gfx942
|
||||
,gfx90a [#mi200x-os]_,gfx90a,gfx90a
|
||||
,gfx908 [#mi100-os]_,gfx908,gfx908
|
||||
,,,
|
||||
FRAMEWORK SUPPORT,.. _framework-support-compatibility-matrix:,,
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.18.1, 2.17.1, 2.16.2","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.4.35,0.4.35,0.4.31
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>`,N/A,N/A,85f95ae
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>`,N/A,N/A,0.7.0
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.2,1.2,1.17.3
|
||||
:doc:`PyTorch <../compatibility/ml-compatibility/pytorch-compatibility>`,"2.7, 2.6, 2.5, 2.4, 2.3","2.6, 2.5, 2.4, 2.3","2.4, 2.3, 2.2, 2.1, 2.0, 1.13"
|
||||
:doc:`TensorFlow <../compatibility/ml-compatibility/tensorflow-compatibility>`,"2.19.1, 2.18.1","2.18.1, 2.17.1, 2.16.2","2.17.0, 2.16.2, 2.15.1"
|
||||
:doc:`JAX <../compatibility/ml-compatibility/jax-compatibility>`,0.6.0,0.4.35,0.4.31
|
||||
:doc:`Stanford Megatron-LM <../compatibility/ml-compatibility/stanford-megatron-lm-compatibility>` [#stanford-megatron-lm_compat]_,N/A,N/A,85f95ae
|
||||
:doc:`Megablocks <../compatibility/ml-compatibility/megablocks-compatibility>` [#megablocks_compat]_,N/A,N/A,0.7.0
|
||||
`ONNX Runtime <https://onnxruntime.ai/docs/build/eps.html#amd-migraphx>`_,1.22.0,1.20.0,1.17.3
|
||||
,,,
|
||||
THIRD PARTY COMMS,.. _thirdpartycomms-support-compatibility-matrix:,,
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.3.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.15.0,>=1.15.0,>=1.15.0
|
||||
`UCC <https://github.com/ROCm/ucc>`_,>=1.4.0,>=1.3.0,>=1.3.0
|
||||
`UCX <https://github.com/ROCm/ucx>`_,>=1.17.0,>=1.15.0,>=1.15.0
|
||||
,,,
|
||||
THIRD PARTY ALGORITHM,.. _thirdpartyalgorithm-support-compatibility-matrix:,,
|
||||
Thrust,2.5.0,2.5.0,2.3.2
|
||||
CUB,2.5.0,2.5.0,2.3.2
|
||||
Thrust,2.6.0,2.5.0,2.3.2
|
||||
CUB,2.6.0,2.5.0,2.3.2
|
||||
,,,
|
||||
KMD & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
:doc:`KMD versions <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
DRIVER & USER SPACE [#kfd_support]_,.. _kfd-userspace-support-compatibility-matrix:,,
|
||||
:doc:`AMD GPU Driver <rocm-install-on-linux:reference/user-kernel-space-compat-matrix>`,"30.10.1 [#driver_patch]_, 30.10, 6.4.x, 6.3.x, 6.2.x","6.4.x, 6.3.x, 6.2.x, 6.1.x","6.4.x, 6.3.x, 6.2.x, 6.1.x"
|
||||
,,,
|
||||
ML & COMPUTER VISION,.. _mllibs-support-compatibility-matrix:,,
|
||||
:doc:`Composable Kernel <composable_kernel:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.2.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.2.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,0.10.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,0.8.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.3.1,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,1.9.10,1.9.10,1.9.1
|
||||
:doc:`MIGraphX <amdmigraphx:index>`,2.13.0,2.12.0,2.11.0
|
||||
:doc:`MIOpen <miopen:index>`,3.5.0,3.4.0,3.3.0
|
||||
:doc:`MIVisionX <mivisionx:index>`,3.3.0,3.2.0,3.1.0
|
||||
:doc:`rocAL <rocal:index>`,2.3.0,2.2.0,2.1.0
|
||||
:doc:`rocDecode <rocdecode:index>`,1.0.0,0.10.0,0.8.0
|
||||
:doc:`rocJPEG <rocjpeg:index>`,1.1.0,0.8.0,0.6.0
|
||||
:doc:`rocPyDecode <rocpydecode:index>`,0.6.0,0.3.1,0.2.0
|
||||
:doc:`RPP <rpp:index>`,2.0.0,1.9.10,1.9.1
|
||||
,,,
|
||||
COMMUNICATION,.. _commlibs-support-compatibility-matrix:,,
|
||||
:doc:`RCCL <rccl:index>`,2.22.3,2.22.3,2.21.5
|
||||
:doc:`rocSHMEM <rocshmem:index>`,2.0.1,2.0.1,N/A
|
||||
:doc:`RCCL <rccl:index>`,2.26.6,2.22.3,2.21.5
|
||||
:doc:`rocSHMEM <rocshmem:index>`,3.0.0,2.0.1,N/A
|
||||
,,,
|
||||
MATH LIBS,.. _mathlibs-support-compatibility-matrix:,,
|
||||
`half <https://github.com/ROCm/half>`_ ,1.12.0,1.12.0,1.12.0
|
||||
:doc:`hipBLAS <hipblas:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,0.12.1,0.12.1,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.18,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,2.12.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,2.4.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,3.2.0,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.3,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,3.2.3,3.2.3,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,4.4.1,4.4.1,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.32,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,3.3.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.28.2,3.28.2,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,1.7.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.43.0,4.43.0,4.42.0
|
||||
:doc:`hipBLAS <hipblas:index>`,3.0.0,2.4.0,2.3.0
|
||||
:doc:`hipBLASLt <hipblaslt:index>`,1.0.0,0.12.1,0.10.0
|
||||
:doc:`hipFFT <hipfft:index>`,1.0.20,1.0.18,1.0.17
|
||||
:doc:`hipfort <hipfort:index>`,0.7.0,0.6.0,0.5.0
|
||||
:doc:`hipRAND <hiprand:index>`,3.0.0,2.12.0,2.11.0
|
||||
:doc:`hipSOLVER <hipsolver:index>`,3.0.0,2.4.0,2.3.0
|
||||
:doc:`hipSPARSE <hipsparse:index>`,4.0.1,3.2.0,3.1.2
|
||||
:doc:`hipSPARSELt <hipsparselt:index>`,0.2.4,0.2.3,0.2.2
|
||||
:doc:`rocALUTION <rocalution:index>`,4.0.0,3.2.3,3.2.1
|
||||
:doc:`rocBLAS <rocblas:index>`,5.0.0,4.4.1,4.3.0
|
||||
:doc:`rocFFT <rocfft:index>`,1.0.34,1.0.32,1.0.31
|
||||
:doc:`rocRAND <rocrand:index>`,4.0.0,3.3.0,3.2.0
|
||||
:doc:`rocSOLVER <rocsolver:index>`,3.30.0,3.28.2,3.27.0
|
||||
:doc:`rocSPARSE <rocsparse:index>`,4.0.2,3.4.0,3.3.0
|
||||
:doc:`rocWMMA <rocwmma:index>`,2.0.0,1.7.0,1.6.0
|
||||
:doc:`Tensile <tensile:src/index>`,4.44.0,4.43.0,4.42.0
|
||||
,,,
|
||||
PRIMITIVES,.. _primitivelibs-support-compatibility-matrix:,,
|
||||
:doc:`hipCUB <hipcub:index>`,3.4.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,1.5.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,3.4.1,3.4.1,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,3.3.0,3.3.0,3.3.0
|
||||
:doc:`hipCUB <hipcub:index>`,4.0.0,3.4.0,3.3.0
|
||||
:doc:`hipTensor <hiptensor:index>`,2.0.0,1.5.0,1.4.0
|
||||
:doc:`rocPRIM <rocprim:index>`,4.0.0,3.4.1,3.3.0
|
||||
:doc:`rocThrust <rocthrust:index>`,4.0.0,3.3.0,3.3.0
|
||||
,,,
|
||||
SUPPORT LIBS,,,
|
||||
`hipother <https://github.com/ROCm/hipother>`_,6.4.43483,6.4.43483,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,6.4.3,6.4.2,6.3.0
|
||||
`hipother <https://github.com/ROCm/hipother>`_,7.0.51830,6.4.43483,6.3.42131
|
||||
`rocm-core <https://github.com/ROCm/rocm-core>`_,7.0.0,6.4.3,6.3.0
|
||||
`ROCT-Thunk-Interface <https://github.com/ROCm/ROCT-Thunk-Interface>`_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_,N/A [#ROCT-rocr]_
|
||||
,,,
|
||||
SYSTEM MGMT TOOLS,.. _tools-support-compatibility-matrix:,,
|
||||
:doc:`AMD SMI <amdsmi:index>`,25.5.1,25.5.1,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,0.3.0,0.3.0,0.3.0
|
||||
:doc:`AMD SMI <amdsmi:index>`,26.0.0,25.5.1,24.7.1
|
||||
:doc:`ROCm Data Center Tool <rdc:index>`,1.1.0,0.3.0,0.3.0
|
||||
:doc:`rocminfo <rocminfo:index>`,1.0.0,1.0.0,1.0.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.7.0,7.5.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.1.0,1.1.0,1.1.0
|
||||
:doc:`ROCm SMI <rocm_smi_lib:index>`,7.8.0,7.7.0,7.4.0
|
||||
:doc:`ROCm Validation Suite <rocmvalidationsuite:index>`,1.2.0,1.1.0,1.1.0
|
||||
,,,
|
||||
PERFORMANCE TOOLS,,,
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,1.4.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.1.1,3.1.1,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.0.2,1.0.2,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.60403,2.0.60402,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,0.6.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.60403,4.1.60402,4.1.60300
|
||||
:doc:`ROCm Bandwidth Test <rocm_bandwidth_test:index>`,2.6.0,1.4.0,1.4.0
|
||||
:doc:`ROCm Compute Profiler <rocprofiler-compute:index>`,3.2.3,3.1.1,3.0.0
|
||||
:doc:`ROCm Systems Profiler <rocprofiler-systems:index>`,1.1.0,1.0.2,0.1.0
|
||||
:doc:`ROCProfiler <rocprofiler:index>`,2.0.70000,2.0.60403,2.0.60300
|
||||
:doc:`ROCprofiler-SDK <rocprofiler-sdk:index>`,1.0.0,0.6.0,0.5.0
|
||||
:doc:`ROCTracer <roctracer:index>`,4.1.70000,4.1.60403,4.1.60300
|
||||
,,,
|
||||
DEVELOPMENT TOOLS,,,
|
||||
:doc:`HIPIFY <hipify:index>`,19.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`HIPIFY <hipify:index>`,20.0.0,19.0.0,18.0.0.24455
|
||||
:doc:`ROCm CMake <rocmcmakebuildtools:index>`,0.14.0,0.14.0,0.14.0
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.2,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,15.2.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.4.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.0.4,2.0.4,2.0.3
|
||||
:doc:`ROCdbgapi <rocdbgapi:index>`,0.77.3,0.77.2,0.77.0
|
||||
:doc:`ROCm Debugger (ROCgdb) <rocgdb:index>`,16.3.0,15.2.0,15.2.0
|
||||
`rocprofiler-register <https://github.com/ROCm/rocprofiler-register>`_,0.5.0,0.4.0,0.4.0
|
||||
:doc:`ROCr Debug Agent <rocr_debug_agent:index>`,2.1.0,2.0.4,2.0.3
|
||||
,,,
|
||||
COMPILERS,.. _compilers-support-compatibility-matrix:,,
|
||||
`clang-ocl <https://github.com/ROCm/clang-ocl>`_,N/A,N/A,N/A
|
||||
:doc:`hipCC <hipcc:index>`,1.1.1,1.1.1,1.1.1
|
||||
`Flang <https://github.com/ROCm/flang>`_,19.0.0.25224,19.0.0.25224,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,19.0.0.25224,19.0.0.25224,18.0.0.24491
|
||||
`Flang <https://github.com/ROCm/flang>`_,20.0.0.25314,19.0.0.25224,18.0.0.24455
|
||||
:doc:`llvm-project <llvm-project:index>`,20.0.0.25314,19.0.0.25224,18.0.0.24491
|
||||
`OpenMP <https://github.com/ROCm/llvm-project/tree/amd-staging/openmp>`_,20.0.0.25314,19.0.0.25224,18.0.0.24491
|
||||
,,,
|
||||
RUNTIMES,.. _runtime-support-compatibility-matrix:,,
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,6.4.43484,6.4.43484,6.3.42131
|
||||
:doc:`HIP <hip:index>`,6.4.43484,6.4.43484,6.3.42131
|
||||
:doc:`AMD CLR <hip:understand/amd_clr>`,7.0.51830,6.4.43484,6.3.42131
|
||||
:doc:`HIP <hip:index>`,7.0.51830,6.4.43484,6.3.42131
|
||||
`OpenCL Runtime <https://github.com/ROCm/clr/tree/develop/opencl>`_,2.0.0,2.0.0,2.0.0
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.15.0,1.15.0,1.14.0
|
||||
|
||||
:doc:`ROCr Runtime <rocr-runtime:index>`,1.18.0,1.15.0,1.14.0
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#7700XT-OS] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#rhel-700] RHEL 8.10 is only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
|
||||
.. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 9 is supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPUs.
|
||||
.. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
|
||||
.. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
|
||||
.. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710.
|
||||
.. [#rl-700] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
|
||||
.. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#mi350x-os] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
|
||||
.. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
|
||||
.. [#RDNA-OS] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
|
||||
.. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||
.. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#mi300x-os] **For ROCm 7.0.x** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
|
||||
.. [#mi300A-os] **For ROCm 7.0.x** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
|
||||
.. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||
.. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||
.. [#7700XT-OS] **Prior ROCm 7.0.0** - Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#driver_patch] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
|
||||
.. [#kfd_support] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
@@ -174,28 +192,30 @@ Use this lookup table to confirm which operating system and kernel versions are
|
||||
:widths: 40, 20, 30, 20
|
||||
:stub-columns: 1
|
||||
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 GA, 6.11 HWE", 2.39
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.3, "6.8 [GA], 6.14 [HWE]", 2.39
|
||||
,,
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 GA, 6.8 HWE", 2.35
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 24.04.2, "6.8 [GA], 6.11 [HWE]", 2.39
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14+, 2.34
|
||||
`Ubuntu <https://ubuntu.com/about/release-cycle#ubuntu-kernel-release-cycle>`_, 22.04.5, "5.15 [GA], 6.8 [HWE]", 2.35
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 9) <https://access.redhat.com/articles/3078#RHEL9>`_, 9.6, 5.14.0-570, 2.34
|
||||
,9.5, 5.14+, 2.34
|
||||
,9.4, 5.14+, 2.34
|
||||
,9.3, 5.14+, 2.34
|
||||
,9.4, 5.14.0-427, 2.34
|
||||
,,
|
||||
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0+, 2.28
|
||||
,8.9, 4.18.0, 2.28
|
||||
`Red Hat Enterprise Linux (RHEL 8) <https://access.redhat.com/articles/3078#RHEL8>`_, 8.10, 4.18.0-553, 2.28
|
||||
,,
|
||||
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.11.0+, 2.38
|
||||
`SUSE Linux Enterprise Server (SLES) <https://www.suse.com/support/kb/doc/?id=000019587#SLE15SP4>`_, 15 SP7, 6.40-150700.51, 2.38
|
||||
,15 SP6, "6.5.0+, 6.4.0", 2.38
|
||||
,15 SP5, 5.14.21, 2.31
|
||||
,,
|
||||
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 5.15.0 (UEK), 2.35
|
||||
`Rocky Linux <https://wiki.rockylinux.org/rocky/version/>`_, 9, 5.14.0-570, 2.34
|
||||
,,
|
||||
`Oracle Linux <https://blogs.oracle.com/scoter/post/oracle-linux-and-unbreakable-enterprise-kernel-uek-releases>`_, 9, 6.12.0 (UEK), 2.34
|
||||
,8, 5.15.0 (UEK), 2.28
|
||||
,,
|
||||
`Debian <https://www.debian.org/download>`_,12, 6.1, 2.36
|
||||
`Debian <https://www.debian.org/download>`_,12, 6.1.0, 2.36
|
||||
,,
|
||||
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.60, 2.38
|
||||
`Azure Linux <https://techcommunity.microsoft.com/blog/linuxandopensourceblog/azure-linux-3-0-now-in-preview-on-azure-kubernetes-service-v1-31/4287229>`_,3.0, 6.6.92, 2.38
|
||||
,,
|
||||
|
||||
.. note::
|
||||
@@ -228,9 +248,24 @@ Expand for full historical view of:
|
||||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#mi300x-past-60] Oracle Linux and Azure Linux are supported only on AMD Instinct MI300X.
|
||||
.. [#single-node-past-60] Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#RDNA-OS-past-60] Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rhel-700-past-60] **For ROCm 7.0.x** - RHEL 8.10 is only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
|
||||
.. [#ol-700-mi300x-past-60] **For ROCm 7.0.x** - Oracle Linux 9 is supported only on AMD Instinct MI300X, MI350X, and MI355X. Oracle Linux 8 is only supported on AMD Instinct MI300X.
|
||||
.. [#mi300x-past-60] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X.
|
||||
.. [#sles-db-700-past-60] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are only supported on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
|
||||
.. [#single-node-past-60] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X for single-node functionality.
|
||||
.. [#az-mi300x-past-60] Starting from ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710.
|
||||
.. [#az-mi300x-630-past-60] **Prior ROCm 6.4.0**- Azure Linux 3.0 is supported only on AMD Instinct MI300X.
|
||||
.. [#rl-700-past-60] Rocky Linux 9 is only supported on AMD Instinct MI300X and MI300A GPUs.
|
||||
.. [#mi350x-os-past-60] AMD Instinct MI355X (gfx950) and MI350X(gfx950) GPUs are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and Oracle Linux 9.
|
||||
.. [#RDNA-OS-700-past-60] **For ROCm 7.0.x** AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) are only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, and RHEL 9.6.
|
||||
.. [#RDNA-OS-past-60] **Prior ROCm 7.0.0** - Radeon AI PRO R9700, Radeon RX 9070 XT (gfx1201), Radeon RX 9060 XT (gfx1200), Radeon PRO W7700 (gfx1101), and Radeon RX 7800 XT (gfx1101) are supported only on Ubuntu 24.04.2, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#rd-v710-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and Azure Linux 3.0.
|
||||
.. [#rd-v620-past-60] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) is only supported on Ubuntu 24.04.3 and Ubuntu 22.04.5.
|
||||
.. [#mi325x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI325X GPU (gfx942) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
|
||||
.. [#mi300x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI300X GPU (gfx942) is supported on all listed :ref:`supported_distributions`.
|
||||
.. [#mi300A-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI300A GPU (gfx942) is supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, Debian 12, and Rocky Linux 9.
|
||||
.. [#mi200x-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) are supported only on Ubuntu 24.04, Ubuntu 22.04, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
|
||||
.. [#mi100-os-past-60] **For ROCm 7.0.x** - AMD Instinct MI100 GPU (gfx908) is only supported on Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
|
||||
.. [#7700XT-OS-past-60] Radeon RX 7700 XT (gfx1101) is supported only on Ubuntu 24.04.2 and RHEL 9.6.
|
||||
.. [#mi300_624-past-60] **For ROCm 6.2.4** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
.. [#mi300_622-past-60] **For ROCm 6.2.2** - MI300X (gfx942) is supported on listed operating systems *except* Ubuntu 22.04.5 [6.8 HWE] and Ubuntu 22.04.4 [6.5 HWE].
|
||||
@@ -241,13 +276,14 @@ Expand for full historical view of:
|
||||
.. [#mi300_610-past-60] **For ROCm 6.1.0** - MI300A (gfx942) is supported on Ubuntu 22.04.4, RHEL 9.4, RHEL 9.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.4.
|
||||
.. [#mi300_602-past-60] **For ROCm 6.0.2** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#mi300_600-past-60] **For ROCm 6.0.0** - MI300A (gfx942) is supported on Ubuntu 22.04.3, RHEL 8.9, and SLES 15 SP5. MI300X (gfx942) is only supported on Ubuntu 22.04.3.
|
||||
.. [#verl_compat] verl is only supported on ROCm 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#megablocks_compat] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#taichi_compat] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#ray_compat] Ray is only supported on ROCm 6.4.1.
|
||||
.. [#llama-cpp_compat] llama.cpp is only supported on ROCm 6.4.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD Kernel-mode GPU Driver (KMD) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The tested user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and kernel-space support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#verl_compat-past-60] verl is only supported on ROCm 6.2.0.
|
||||
.. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is only supported on ROCm 6.3.0.
|
||||
.. [#dgl_compat-past-60] DGL is only supported on ROCm 6.4.0.
|
||||
.. [#megablocks_compat-past-60] Megablocks is only supported on ROCm 6.3.0.
|
||||
.. [#taichi_compat-past-60] Taichi is only supported on ROCm 6.3.2.
|
||||
.. [#ray_compat-past-60] Ray is only supported on ROCm 6.4.1.
|
||||
.. [#llama-cpp_compat-past-60] llama.cpp is only supported on ROCm 6.4.0.
|
||||
.. [#driver_patch-past-60] AMD GPU Driver (amdgpu) 30.10.1 is a quality release that resolves an issue identified in the 30.10 release. There are no other significant changes or feature additions in ROCm 7.0.1 from ROCm 7.0.0. AMD GPU Driver (amdgpu) 30.10.1 is compatible with ROCm 7.0.1 and ROCm 7.0.0.
|
||||
.. [#kfd_support-past-60] As of ROCm 6.4.0, forward and backward compatibility between the AMD GPU Driver (amdgpu) and its user space software is provided up to a year apart. For earlier ROCm releases, the compatibility is provided for +/- 2 releases. The supported user space versions on this page were accurate as of the time of initial ROCm release. For the most up-to-date information, see the latest version of this information at `User and AMD GPU Driver support matrix <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/user-kernel-space-compat-matrix.html>`_.
|
||||
.. [#ROCT-rocr-past-60] Starting from ROCm 6.3.0, the ROCT Thunk Interface is included as part of the ROCr runtime package.
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ with ROCm support:
|
||||
- Offers AMD-validated and community :ref:`Docker images <jax-docker-compat>`
|
||||
with ROCm and JAX preinstalled.
|
||||
|
||||
- ROCm JAX repository: `ROCm/jax <https://github.com/ROCm/jax>`_
|
||||
- ROCm JAX repository: `ROCm/rocm-jax <https://github.com/ROCm/rocm-jax>`_
|
||||
|
||||
- See the :doc:`ROCm JAX installation guide <rocm-install-on-linux:install/3rd-party/jax-install>`
|
||||
to get started.
|
||||
@@ -310,5 +310,54 @@ For a complete and up-to-date list of JAX public modules (for example, ``jax.num
|
||||
Since version 0.1.56, JAX has full support for ROCm, and the
|
||||
:ref:`Known issues and important notes <jax_comp_known_issues>` section
|
||||
contains details about limitations specific to the ROCm backend. The list of
|
||||
JAX API modules is maintained by the JAX project and is subject to change.
|
||||
JAX API modules are maintained by the JAX project and is subject to change.
|
||||
Refer to the official Jax documentation for the most up-to-date information.
|
||||
|
||||
Key features and enhancements for ROCm 7.0
|
||||
===============================================================================
|
||||
|
||||
- Upgraded XLA backend: Integrates a newer XLA version, enabling better
|
||||
optimizations, broader operator support, and potential performance gains.
|
||||
|
||||
- RNN support: Native RNN support (including LSTMs via ``jax.experimental.rnn``)
|
||||
now available on ROCm, aiding sequence model development.
|
||||
|
||||
- Comprehensive linear algebra capabilities: Offers robust ``jax.linalg``
|
||||
operations, essential for scientific and machine learning tasks.
|
||||
|
||||
- Expanded AMD GPU architecture support: Provides ongoing support for gfx1101
|
||||
GPUs and introduces support for gfx950 and gfx12xx GPUs.
|
||||
|
||||
- Mixed FP8 precision support: Enables ``lax.dot_general`` operations with mixed FP8
|
||||
types, offering pathways for memory and compute efficiency.
|
||||
|
||||
- Streamlined PyPi packaging: Provides reliable PyPi wheels for JAX on ROCm,
|
||||
simplifying the installation process.
|
||||
|
||||
- Pallas experimental kernel development: Continued Pallas framework
|
||||
enhancements for custom GPU kernels, including new intrinsics (specific
|
||||
kernel behaviors under review).
|
||||
|
||||
- Improved build system and CI: Enhanced ROCm build system and CI for greater
|
||||
reliability and maintainability.
|
||||
|
||||
- Enhanced distributed computing setup: Improved JAX setup in multi-GPU
|
||||
distributed environments.
|
||||
|
||||
.. _jax_comp_known_issues:
|
||||
|
||||
Known issues and notes for ROCm 7.0
|
||||
===============================================================================
|
||||
|
||||
- ``nn.dot_product_attention``: Certain configurations of ``jax.nn.dot_product_attention``
|
||||
may cause segmentation faults, though the majority of use cases work correctly.
|
||||
|
||||
- SVD with dynamic shapes: SVD on inputs with dynamic/symbolic shapes might result in an error.
|
||||
SVD with static shapes is unaffected.
|
||||
|
||||
- QR decomposition with symbolic shapes: QR decomposition operations may fail when using
|
||||
symbolic/dynamic shapes in shape polymorphic contexts.
|
||||
|
||||
- Pallas kernels: Specific advanced Pallas kernels may exhibit variations in
|
||||
numerical output or resource usage. These are actively reviewed as part of
|
||||
Pallas's experimental development.
|
||||
|
||||
@@ -366,7 +366,8 @@ feature set available to developers.
|
||||
Supported modules and data types
|
||||
================================================================================
|
||||
|
||||
The following section outlines the supported data types, modules, and domain libraries available in PyTorch on ROCm.
|
||||
The following section outlines the supported data types, modules, and domain
|
||||
libraries available in PyTorch on ROCm.
|
||||
|
||||
Supported data types
|
||||
--------------------------------------------------------------------------------
|
||||
@@ -533,3 +534,72 @@ with ROCm.
|
||||
dispatching.
|
||||
|
||||
**Note:** Only official release exists.
|
||||
|
||||
Key features and enhancements for PyTorch 2.7 with ROCm 7.0
|
||||
================================================================================
|
||||
|
||||
- Enhanced TunableOp framework: Introduces ``tensorfloat32`` support for
|
||||
TunableOp operations, improved offline tuning for ScaledGEMM operations,
|
||||
submatrix offline tuning capabilities, and better logging for BLAS operations
|
||||
without bias vectors.
|
||||
|
||||
- Expanded GPU architecture support: Provides optimized support for newer GPU
|
||||
architectures, including gfx1200 and gfx1201 with preferred hipBLASLt backend
|
||||
selection, along with improvements for gfx950 and gfx1100 series GPUs.
|
||||
|
||||
- Advanced Triton Integration: AOTriton 0.10b introduces official support for
|
||||
gfx950 and gfx1201, along with experimental support for gfx1101, gfx1151,
|
||||
gfx1150, and gfx1200.
|
||||
|
||||
- Improved element-wise kernel performance: Delivers enhanced vectorized
|
||||
element-wise kernels with better support for heterogeneous tensor types and
|
||||
optimized input vectorization for tensors with mixed data types.
|
||||
|
||||
- MIOpen deep learning optimizations: Enables NHWC BatchNorm by default on
|
||||
ROCm 7.0+, provides ``maxpool`` forward and backward performance improvements
|
||||
targeting ResNet scenarios, and includes updated launch configurations for
|
||||
better performance.
|
||||
|
||||
- Enhanced memory and tensor operations: Features fixes for in-place ``aten``
|
||||
sum operations with specialized templated kernels, improved 3D tensor
|
||||
performance with NHWC format, and better handling of memory-bound matrix
|
||||
multiplication operations.
|
||||
|
||||
- Robust testing and quality improvements: Includes comprehensive test suite
|
||||
updates with improved tolerance handling for Navi3x architectures, generalized
|
||||
ROCm-specific test conditions, and enhanced unit test coverage for Flash
|
||||
Attention and Memory Efficient operations.
|
||||
|
||||
- Build system and infrastructure improvements: Provides updated CentOS Stream 9
|
||||
support, improved Docker configuration, migration to public MAGMA repository,
|
||||
and enhanced QA automation scripts for PyTorch unit testing.
|
||||
|
||||
- Composable Kernel (CK) updates: Features updated CK submodule integration with
|
||||
the latest optimizations and performance improvements for core mathematical
|
||||
operations.
|
||||
|
||||
- Development and debugging enhancements: Includes improved source handling for
|
||||
dynamic compilation, better error handling for atomic operations, and enhanced
|
||||
state checking for trace operations.
|
||||
|
||||
- Integrate APEX fused layer normalization, which can have positive impact on
|
||||
text-to-video models.
|
||||
|
||||
- Integrate APEX distributed fused LAMB and distributed fused ADAM, which can
|
||||
have positive impact on BERT-L and Llama2-SFT.
|
||||
|
||||
- FlashAttention v3 has been integrated for AMD GPUs.
|
||||
|
||||
- `Pytorch C++ extensions <https://pytorch.org/tutorials/advanced/cpp_extension.html>`_
|
||||
provide a mechanism for compiling custom operations that can be used during
|
||||
network training or inference. For AMD platforms, ``amdclang++`` has been
|
||||
validated as the supported compiler for building these extensions.
|
||||
|
||||
Known issues and notes for PyTorch 2.7 with ROCm 7.0
|
||||
================================================================================
|
||||
|
||||
- The ``matmul.allow_fp16_reduced_precision_reduction`` and
|
||||
``matmul.allow_bf16_reduced_precision_reduction`` options under
|
||||
``torch.backends.cuda`` are not supported. As a result,
|
||||
reduced-precision reductions using FP16 or BF16 accumulation types are not
|
||||
available.
|
||||
|
||||
@@ -72,7 +72,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.18-dev/images/sha256-96754ce2d30f729e19b497279915b5212ba33d5e408e7e5dd3f2304d87e3441e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
@@ -81,7 +81,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.18-dev/images/sha256-fa741508d383858e86985a9efac85174529127408102558ae2e3a4ac894eea1e"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.18.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.18.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.18.0 <https://github.com/tensorflow/tensorboard/tree/2.18.0>`__
|
||||
@@ -90,7 +90,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.17-dev/images/sha256-3a0aef09f2a8833c2b64b85874dd9449ffc2ad257351857338ff5b706c03a418"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
@@ -99,7 +99,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.17-dev/images/sha256-bc7341a41ebe7ab261aa100732874507c452421ef733e408ac4f05ed453b0bc5"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.17.1-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.17.1 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.17.1 <https://github.com/tensorflow/tensorboard/tree/2.17.1>`__
|
||||
@@ -108,7 +108,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.12-tf2.16-dev/images/sha256-4841a8df7c340dab79bf9362dad687797649a00d594e0832eb83ea6880a40d3b"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp312-cp312-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 24.04
|
||||
- `Python 3.12 <https://www.python.org/downloads/release/python-31210/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
@@ -117,7 +117,7 @@ the |docker-icon| icon to view the image on Docker Hub.
|
||||
|
||||
<a href="https://hub.docker.com/layers/rocm/tensorflow/rocm6.4.2-py3.10-tf2.16-dev/images/sha256-883fa95aba960c58a3e46fceaa18f03ede2c7df89b8e9fd603ab2d47e0852897"><i class="fab fa-docker fa-lg"></i> rocm/tensorflow</a>
|
||||
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/tensorflow_rocm-2.16.2-cp310-cp310-manylinux_2_28_x86_64.whl>`__
|
||||
- `tensorflow-rocm 2.16.2 <https://repo.radeon.com/rocm/manylinux/rocm-rel-6.4.2/>`__
|
||||
- 22.04
|
||||
- `Python 3.10 <https://www.python.org/downloads/release/python-31017/>`__
|
||||
- `TensorBoard 2.16.2 <https://github.com/tensorflow/tensorboard/tree/2.16.2>`__
|
||||
|
||||
@@ -21,7 +21,8 @@ architecture.
|
||||
* [AMD Instinct™ MI300 microarchitecture](./gpu-arch/mi300.md)
|
||||
* [AMD Instinct MI300/CDNA3 ISA](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf)
|
||||
* [White paper](https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf)
|
||||
* [Performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
* [MI300 performance counters](./gpu-arch/mi300-mi200-performance-counters.rst)
|
||||
* [MI350 series performance counters](./gpu-arch/mi350-performance-counters.rst)
|
||||
:::
|
||||
|
||||
:::{grid-item-card}
|
||||
|
||||
530
docs/conceptual/gpu-arch/mi350-performance-counters.rst
Normal file
530
docs/conceptual/gpu-arch/mi350-performance-counters.rst
Normal file
@@ -0,0 +1,530 @@
|
||||
.. meta::
|
||||
:description: MI355 series performance counters and metrics
|
||||
:keywords: MI355, MI355X, MI3XX
|
||||
|
||||
***********************************
|
||||
MI350 series performance counters
|
||||
***********************************
|
||||
|
||||
This topic lists and describes the hardware performance counters and derived metrics available on the AMD Instinct MI350 and MI355 accelerators. These counters are available for profiling using `ROCprofiler-SDK <https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/index.html>`_ and `ROCm Compute Profiler <https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/>`_.
|
||||
|
||||
The following sections list the performance counters based on the IP blocks.
|
||||
|
||||
Command processor packet processor counters (CPC)
|
||||
==================================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - CPC_ALWAYS_COUNT
|
||||
- Always count.
|
||||
|
||||
* - CPC_ADC_VALID_CHUNK_NOT_AVAIL
|
||||
- ADC valid chunk is not available when dispatch walking is in progress in the multi-xcc mode.
|
||||
|
||||
* - CPC_ADC_DISPATCH_ALLOC_DONE
|
||||
- ADC dispatch allocation is done.
|
||||
|
||||
* - CPC_ADC_VALID_CHUNK_END
|
||||
- ADC crawler's valid chunk end in the multi-xcc mode.
|
||||
|
||||
* - CPC_SYNC_FIFO_FULL_LEVEL
|
||||
- SYNC FIFO full last cycles.
|
||||
|
||||
* - CPC_SYNC_FIFO_FULL
|
||||
- SYNC FIFO full times.
|
||||
|
||||
* - CPC_GD_BUSY
|
||||
- ADC busy.
|
||||
|
||||
* - CPC_TG_SEND
|
||||
- ADC thread group send.
|
||||
|
||||
* - CPC_WALK_NEXT_CHUNK
|
||||
- ADC walking next valid chunk in the multi-xcc mode.
|
||||
|
||||
* - CPC_STALLED_BY_SE0_SPI
|
||||
- ADC CSDATA stalled by SE0SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE1_SPI
|
||||
- ADC CSDATA stalled by SE1SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE2_SPI
|
||||
- ADC CSDATA stalled by SE2SPI.
|
||||
|
||||
* - CPC_STALLED_BY_SE3_SPI
|
||||
- ADC CSDATA stalled by SE3SPI.
|
||||
|
||||
* - CPC_LTE_ALL
|
||||
- CPC sync counter LteAll. Only Master XCD manages LteAll.
|
||||
|
||||
* - CPC_SYNC_WRREQ_FIFO_BUSY
|
||||
- CPC sync counter request FIFO is not empty.
|
||||
|
||||
* - CPC_CANE_BUSY
|
||||
- CPC CANE bus is busy, which indicates the presence of inflight sync counter requests.
|
||||
|
||||
* - CPC_CANE_STALL
|
||||
- CPC sync counter sending is stalled by CANE.
|
||||
|
||||
Shader pipe interpolators (SPI) counters
|
||||
=========================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - SPI_CS0_WINDOW_VALID
|
||||
- Clock count enabled by PIPE0 perfcounter_start event.
|
||||
|
||||
* - SPI_CS0_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE0 (SPI or SH).
|
||||
|
||||
* - SPI_CS0_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE0.
|
||||
|
||||
* - SPI_CS0_CRAWLER_STALL
|
||||
- Number of clocks when PIPE0 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS0_EVENT_WAVE
|
||||
- Number of PIPE0 events and waves.
|
||||
|
||||
* - SPI_CS0_WAVE
|
||||
- Number of PIPE0 waves.
|
||||
|
||||
* - SPI_CS1_WINDOW_VALID
|
||||
- Clock count enabled by PIPE1 perfcounter_start event.
|
||||
|
||||
* - SPI_CS1_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE1 (SPI or SH).
|
||||
|
||||
* - SPI_CS1_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE1.
|
||||
|
||||
* - SPI_CS1_CRAWLER_STALL
|
||||
- Number of clocks when PIPE1 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS1_EVENT_WAVE
|
||||
- Number of PIPE1 events and waves.
|
||||
|
||||
* - SPI_CS1_WAVE
|
||||
- Number of PIPE1 waves.
|
||||
|
||||
* - SPI_CS2_WINDOW_VALID
|
||||
- Clock count enabled by PIPE2 perfcounter_start event.
|
||||
|
||||
* - SPI_CS2_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE2 (SPI or SH).
|
||||
|
||||
* - SPI_CS2_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE2.
|
||||
|
||||
* - SPI_CS2_CRAWLER_STALL
|
||||
- Number of clocks when PIPE2 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS2_EVENT_WAVE
|
||||
- Number of PIPE2 events and waves.
|
||||
|
||||
* - SPI_CS2_WAVE
|
||||
- Number of PIPE2 waves.
|
||||
|
||||
* - SPI_CS3_WINDOW_VALID
|
||||
- Clock count enabled by PIPE3 perfcounter_start event.
|
||||
|
||||
* - SPI_CS3_BUSY
|
||||
- Number of clocks with outstanding waves for PIPE3 (SPI or SH).
|
||||
|
||||
* - SPI_CS3_NUM_THREADGROUPS
|
||||
- Number of thread groups launched for PIPE3.
|
||||
|
||||
* - SPI_CS3_CRAWLER_STALL
|
||||
- Number of clocks when PIPE3 event or wave order FIFO is full.
|
||||
|
||||
* - SPI_CS3_EVENT_WAVE
|
||||
- Number of PIPE3 events and waves.
|
||||
|
||||
* - SPI_CS3_WAVE
|
||||
- Number of PIPE3 waves.
|
||||
|
||||
* - SPI_CSQ_P0_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue0.
|
||||
|
||||
* - SPI_CSQ_P0_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue1.
|
||||
|
||||
* - SPI_CSQ_P0_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue2.
|
||||
|
||||
* - SPI_CSQ_P0_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue3.
|
||||
|
||||
* - SPI_CSQ_P0_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue4.
|
||||
|
||||
* - SPI_CSQ_P0_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue5.
|
||||
|
||||
* - SPI_CSQ_P0_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue6.
|
||||
|
||||
* - SPI_CSQ_P0_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE0 Queue7.
|
||||
|
||||
* - SPI_CSQ_P1_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue0.
|
||||
|
||||
* - SPI_CSQ_P1_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue1.
|
||||
|
||||
* - SPI_CSQ_P1_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue2.
|
||||
|
||||
* - SPI_CSQ_P1_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue3.
|
||||
|
||||
* - SPI_CSQ_P1_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue4.
|
||||
|
||||
* - SPI_CSQ_P1_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue5.
|
||||
|
||||
* - SPI_CSQ_P1_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue6.
|
||||
|
||||
* - SPI_CSQ_P1_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE1 Queue7.
|
||||
|
||||
* - SPI_CSQ_P2_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue0.
|
||||
|
||||
* - SPI_CSQ_P2_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue1.
|
||||
|
||||
* - SPI_CSQ_P2_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue2.
|
||||
|
||||
* - SPI_CSQ_P2_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue3.
|
||||
|
||||
* - SPI_CSQ_P2_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue4.
|
||||
|
||||
* - SPI_CSQ_P2_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue5.
|
||||
|
||||
* - SPI_CSQ_P2_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue6.
|
||||
|
||||
* - SPI_CSQ_P2_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE2 Queue7.
|
||||
|
||||
* - SPI_CSQ_P3_Q0_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue0.
|
||||
|
||||
* - SPI_CSQ_P3_Q1_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue1.
|
||||
|
||||
* - SPI_CSQ_P3_Q2_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue2.
|
||||
|
||||
* - SPI_CSQ_P3_Q3_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue3.
|
||||
|
||||
* - SPI_CSQ_P3_Q4_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue4.
|
||||
|
||||
* - SPI_CSQ_P3_Q5_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue5.
|
||||
|
||||
* - SPI_CSQ_P3_Q6_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue6.
|
||||
|
||||
* - SPI_CSQ_P3_Q7_OCCUPANCY
|
||||
- Sum of occupancy info for PIPE3 Queue7.
|
||||
|
||||
* - SPI_CSQ_P0_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE0 queues.
|
||||
|
||||
* - SPI_CSQ_P1_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE1 queues.
|
||||
|
||||
* - SPI_CSQ_P2_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE2 queues.
|
||||
|
||||
* - SPI_CSQ_P3_OCCUPANCY
|
||||
- Sum of occupancy info for all PIPE3 queues.
|
||||
|
||||
* - SPI_VWC0_VDATA_VALID_WR
|
||||
- Number of clocks VGPR bus_0 writes VGPRs.
|
||||
|
||||
* - SPI_VWC1_VDATA_VALID_WR
|
||||
- Number of clocks VGPR bus_1 writes VGPRs.
|
||||
|
||||
* - SPI_CSC_WAVE_CNT_BUSY
|
||||
- Number of cycles when there is any wave in the pipe.
|
||||
|
||||
Compute unit (SQ) counters
|
||||
===========================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - SQ_INSTS_VALU_MFMA_F6F4
|
||||
- Number of VALU V_MFMA_*_F6F4 instructions.
|
||||
|
||||
* - SQ_INSTS_VALU_MFMA_MOPS_F6F4
|
||||
- Number of VALU matrix with the performed math operations (add or mul) divided by 512, assuming a full EXEC mask of F6 or F4 data type.
|
||||
|
||||
* - SQ_ACTIVE_INST_VALU2
|
||||
- Number of quad-cycles when two VALU instructions are issued (per-simd, nondeterministic).
|
||||
|
||||
* - SQ_INSTS_LDS_LOAD
|
||||
- Number of LDS load instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_STORE
|
||||
- Number of LDS store instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_ATOMIC
|
||||
- Number of LDS atomic instructions issued (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_LOAD_BANDWIDTH
|
||||
- Total number of 64-bytes loaded (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_STORE_BANDWIDTH
|
||||
- Total number of 64-bytes written (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_LDS_ATOMIC_BANDWIDTH
|
||||
- Total number of 64-bytes atomic (instrSize * CountOnes(EXEC))/64 (per-simd, emulated).
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP16
|
||||
- Counts FLOPS per instruction on float 16 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP32
|
||||
- Counts FLOPS per instruction on float 32 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP64
|
||||
- Counts FLOPS per instruction on float 64 excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP16_TRANS
|
||||
- Counts FLOPS per instruction on float 16 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP32_TRANS
|
||||
- Counts FLOPS per instruction on float 32 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_FLOPS_FP64_TRANS
|
||||
- Counts FLOPS per instruction on float 64 trans excluding MFMA/SMFMA.
|
||||
|
||||
* - SQ_INSTS_VALU_IOPS
|
||||
- Counts OPS per instruction on integer or unsigned or bit data (per-simd, emulated).
|
||||
|
||||
* - SQ_LDS_DATA_FIFO_FULL
|
||||
- Number of cycles LDS data FIFO is full (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_LDS_CMD_FIFO_FULL
|
||||
- Number of cycles LDS command FIFO is full (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_TA_ADDR_FIFO_FULL
|
||||
- Number of cycles texture requests are stalled due to full address FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_TA_CMD_FIFO_FULL
|
||||
- Number of cycles texture requests are stalled due to full cmd FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQ_VMEM_WR_TA_DATA_FIFO_FULL
|
||||
- Number of cycles texture writes are stalled due to full data FIFO in TA (nondeterministic, unwindowed).
|
||||
|
||||
* - SQC_ICACHE_MISSES_DUPLICATE
|
||||
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
|
||||
|
||||
* - SQC_DCACHE_MISSES_DUPLICATE
|
||||
- Number of duplicate misses (access to a non-resident, miss pending CL) (per-SQ, per-Bank, nondeterministic).
|
||||
|
||||
Texture addressing (TA) unit counters
|
||||
======================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TA_BUFFER_READ_LDS_WAVEFRONTS
|
||||
- Number of buffer read wavefronts for LDS return processed by the TA.
|
||||
|
||||
* - TA_FLAT_READ_LDS_WAVEFRONTS
|
||||
- Number of flat opcode reads for LDS return processed by the TA.
|
||||
|
||||
Texture data (TD) unit counters
|
||||
================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TD_WRITE_ACKT_WAVEFRONT
|
||||
- Number of write acknowledgments, sent to SQ and not to SP.
|
||||
|
||||
* - TD_TD_SP_TRAFFIC
|
||||
- Number of times this TD sends data to the SP.
|
||||
|
||||
Texture cache per pipe (TCP) counters
|
||||
======================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TCP_TCP_TA_ADDR_STALL_CYCLES
|
||||
- TCP stalls TA addr interface.
|
||||
|
||||
* - TCP_TCP_TA_DATA_STALL_CYCLES
|
||||
- TCP stalls TA data interface. Now windowed.
|
||||
|
||||
* - TCP_LFIFO_STALL_CYCLES
|
||||
- Memory latency FIFOs full stall.
|
||||
|
||||
* - TCP_RFIFO_STALL_CYCLES
|
||||
- Memory Request FIFOs full stall.
|
||||
|
||||
* - TCP_TCR_RDRET_STALL
|
||||
- Write into cache stalled by read return from TCR.
|
||||
|
||||
* - TCP_PENDING_STALL_CYCLES
|
||||
- Stall due to data pending from L2.
|
||||
|
||||
* - TCP_UTCL1_SERIALIZATION_STALL
|
||||
- Total number of stalls caused due to serializing translation requests through the UTCL1.
|
||||
|
||||
* - TCP_UTCL1_THRASHING_STALL
|
||||
- Stall caused by thrashing feature in any probe. Lacks accuracy when the stall signal overlaps between probe0 and probe1, which is worse with MECO of thrashing deadlock. Some probe0 events could miss being counted in with MECO on. This perf count provides a rough thrashing estimate.
|
||||
|
||||
* - TCP_UTCL1_TRANSLATION_MISS_UNDER_MISS
|
||||
- Translation miss_under_miss.
|
||||
|
||||
* - TCP_UTCL1_STALL_INFLIGHT_MAX
|
||||
- Total UTCL1 stalls due to inflight counter saturation.
|
||||
|
||||
* - TCP_UTCL1_STALL_LRU_INFLIGHT
|
||||
- Total UTCL1 stalls due to LRU cache line with inflight traffic.
|
||||
|
||||
* - TCP_UTCL1_STALL_MULTI_MISS
|
||||
- Total UTCL1 stalls due to arbitrated multiple misses.
|
||||
|
||||
* - TCP_UTCL1_LFIFO_FULL
|
||||
- Total UTCL1 and UTCL2 latency, which hides FIFO full cycles.
|
||||
|
||||
* - TCP_UTCL1_STALL_LFIFO_NOT_RES
|
||||
- Total UTCL1 stalls due to UTCL2 latency, which hides FIFO output (not resident).
|
||||
|
||||
* - TCP_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS
|
||||
- Total UTCL1 stalls due to UTCL2_req being out of credits.
|
||||
|
||||
* - TCP_CLIENT_UTCL1_INFLIGHT
|
||||
- The sum of inflight client to UTCL1 requests per cycle.
|
||||
|
||||
* - TCP_TAGRAM0_REQ
|
||||
- Total L2 requests mapping to TagRAM 0 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM1_REQ
|
||||
- Total L2 requests mapping to TagRAM 1 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM2_REQ
|
||||
- Total L2 requests mapping to TagRAM 2 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TAGRAM3_REQ
|
||||
- Total L2 requests mapping to TagRAM 3 from this TCP to all TCCs.
|
||||
|
||||
* - TCP_TCP_LATENCY
|
||||
- Total TCP wave latency (from the first clock of wave entering to the first clock of wave leaving). Divide by TA_TCP_STATE_READ to find average wave latency.
|
||||
|
||||
* - TCP_TCC_READ_REQ_LATENCY
|
||||
- Total TCP to TCC request latency for reads and atomics with return. Not Windowed.
|
||||
|
||||
* - TCP_TCC_WRITE_REQ_LATENCY
|
||||
- Total TCP to TCC request latency for writes and atomics without return. Not Windowed.
|
||||
|
||||
* - TCP_TCC_WRITE_REQ_HOLE_LATENCY
|
||||
- Total TCP req to TCC hole latency for writes and atomics. Not Windowed.
|
||||
|
||||
Texture cache per channel (TCC) counters
|
||||
=========================================
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Hardware counter
|
||||
- Definition
|
||||
|
||||
* - TCC_READ_SECTORS
|
||||
- Total number of 32B data sectors in read requests.
|
||||
|
||||
* - TCC_WRITE_SECTORS
|
||||
- Total number of 32B data sectors in write requests.
|
||||
|
||||
* - TCC_ATOMIC_SECTORS
|
||||
- Total number of 32B data sectors in atomic requests.
|
||||
|
||||
* - TCC_BYPASS_REQ
|
||||
- Number of bypass requests. This is measured at the tag block.
|
||||
|
||||
* - TCC_LATENCY_FIFO_FULL
|
||||
- Number of cycles when the latency FIFO is full.
|
||||
|
||||
* - TCC_SRC_FIFO_FULL
|
||||
- Number of cycles when the SRC FIFO is assumed to be full as measured at the IB block.
|
||||
|
||||
* - TCC_EA0_RDREQ_64B
|
||||
- Number of 64-byte TCC/EA read requests.
|
||||
|
||||
* - TCC_EA0_RDREQ_128B
|
||||
- Number of 128-byte TCC/EA read requests.
|
||||
|
||||
* - TCC_IB_REQ
|
||||
- Number of requests through the IB. This measures the number of raw requests from graphics clients to this TCC.
|
||||
|
||||
* - TCC_IB_STALL
|
||||
- Number of cycles when the IB output is stalled.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_DRAM
|
||||
- Number of TCC/EA write requests (32-byte or 64-byte) destined for DRAM (MC).
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_DRAM
|
||||
- Number of TCC/EA atomic requests (32-byte or 64-byte) destined for DRAM (MC).
|
||||
|
||||
* - TCC_EA0_RDREQ_DRAM_32B
|
||||
- Number of 32-byte TCC/EA read requests due to DRAM traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_RDREQ_GMI_32B
|
||||
- Number of 32-byte TCC/EA read requests due to GMI traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_RDREQ_IO_32B
|
||||
- Number of 32-byte TCC/EA read requests due to IO traffic. One 64-byte request is counted as two and one 128-byte as four.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_DRAM_32B
|
||||
- Number of 32-byte TCC/EA write requests due to DRAM traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_DRAM_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to DRAM traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_GMI_32B
|
||||
- Number of 32-byte TCC/EA write requests due to GMI traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_GMI_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to GMI traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_WRITE_IO_32B
|
||||
- Number of 32-byte TCC/EA write requests due to IO traffic. One 64-byte request is counted as two.
|
||||
|
||||
* - TCC_EA0_WRREQ_ATOMIC_IO_32B
|
||||
- Number of 32-byte TCC/EA atomic requests due to IO traffic. One 64-byte request is counted as two.
|
||||
12
docs/conf.py
12
docs/conf.py
@@ -89,15 +89,15 @@ project = "ROCm Documentation"
|
||||
project_path = os.path.abspath(".").replace("\\", "/")
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = "6.4.3"
|
||||
release = "6.4.3"
|
||||
version = "7.0.1"
|
||||
release = "7.0.1"
|
||||
setting_all_article_info = True
|
||||
all_article_info_os = ["linux", "windows"]
|
||||
all_article_info_author = ""
|
||||
|
||||
# pages with specific settings
|
||||
article_pages = [
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-08-07"},
|
||||
{"file": "about/release-notes", "os": ["linux"], "date": "2025-09-17"},
|
||||
{"file": "release/changelog", "os": ["linux"],},
|
||||
{"file": "compatibility/compatibility-matrix", "os": ["linux"]},
|
||||
{"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
|
||||
@@ -135,9 +135,13 @@ article_pages = [
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.4", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.6", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.7", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-history", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.4", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/previous-versions/jax-maxtext-v25.5", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
|
||||
|
||||
{"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
|
||||
@@ -162,6 +166,8 @@ article_pages = [
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.0.1-20250702", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250702", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.9.1-20250715", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.0-20250812", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/sglang-history", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
|
||||
{"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 81 KiB After Width: | Height: | Size: 114 KiB |
@@ -0,0 +1,32 @@
|
||||
dockers:
|
||||
- pull_tag: lmsysorg/sglang:v0.5.2rc1-rocm700-mi30x
|
||||
docker_hub_url: https://hub.docker.com/layers/lmsysorg/sglang/v0.5.2rc1-rocm700-mi30x/images/sha256-10c4ee502ddba44dd8c13325e6e03868bfe7f43d23d0a44780a8ee8b393f4729
|
||||
components:
|
||||
ROCm: 7.0.0
|
||||
SGLang: v0.5.2rc1
|
||||
pytorch-triton-rocm: 3.4.0+rocm7.0.0.gitf9e5bf54
|
||||
model_groups:
|
||||
- group: Dense models
|
||||
tag: dense-models
|
||||
models:
|
||||
- model: Llama 3.1 8B Instruct
|
||||
model_repo: Llama-3.1-8B-Instruct
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
|
||||
- model: Llama 3.1 405B FP8 KV
|
||||
model_repo: Llama-3.1-405B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
||||
- model: Llama 3.3 70B FP8 KV
|
||||
model_repo: amd-Llama-3.3-70B-Instruct-FP8-KV
|
||||
url: https://huggingface.co/amd/Llama-3.3-70B-Instruct-FP8-KV
|
||||
- model: Qwen3 32B
|
||||
model_repo: Qwen3-32B
|
||||
url: https://huggingface.co/Qwen/Qwen3-32B
|
||||
- group: Small experts models
|
||||
tag: small-experts-models
|
||||
models:
|
||||
- model: DeepSeek V3
|
||||
model_repo: DeepSeek-V3
|
||||
url: https://huggingface.co/deepseek-ai/DeepSeek-V3
|
||||
- model: Mixtral 8x7B v0.1
|
||||
model_repo: Mixtral-8x7B-v0.1
|
||||
url: https://huggingface.co/mistralai/Mixtral-8x7B-v0.1
|
||||
@@ -0,0 +1,162 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/pytorch-training:v25.7
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.7/images/sha256-cc6fd840ab89cb81d926fc29eca6d075aee9875a55a522675a4b9231c9a0a712
|
||||
components:
|
||||
ROCm: 6.4.2
|
||||
PyTorch: 2.8.0a0+gitd06a406
|
||||
Python: 3.10.18
|
||||
Transformer Engine: 2.2.0.dev0+94e53dd8
|
||||
Flash Attention: 3.0.0.post1
|
||||
hipBLASLt: 1.1.0-4b9a52edfc
|
||||
Triton: 3.3.0
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 4 Scout 17B-16E
|
||||
mad_tag: pyt_train_llama-4-scout-17b-16e
|
||||
model_repo: Llama-4-17B_16E
|
||||
url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 3.3 70B
|
||||
mad_tag: pyt_train_llama-3.3-70b
|
||||
model_repo: Llama-3.3-70B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
|
||||
- model: Llama 3.2 1B
|
||||
mad_tag: pyt_train_llama-3.2-1b
|
||||
model_repo: Llama-3.2-1B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.2-1B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 3.2 3B
|
||||
mad_tag: pyt_train_llama-3.2-3b
|
||||
model_repo: Llama-3.2-3B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.2-3B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 3.2 Vision 11B
|
||||
mad_tag: pyt_train_llama-3.2-vision-11b
|
||||
model_repo: Llama-3.2-Vision-11B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw]
|
||||
- model: Llama 3.2 Vision 90B
|
||||
mad_tag: pyt_train_llama-3.2-vision-90b
|
||||
model_repo: Llama-3.2-Vision-90B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw]
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_train_llama-3.1-8b
|
||||
model_repo: Llama-3.1-8B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: BF16
|
||||
training_modes: [pretrain, finetune_fw, finetune_lora, HF_pretrain]
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_train_llama-3.1-70b
|
||||
model_repo: Llama-3.1-70B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
||||
precision: BF16
|
||||
training_modes: [pretrain, finetune_fw, finetune_lora]
|
||||
- model: Llama 3.1 405B
|
||||
mad_tag: pyt_train_llama-3.1-405b
|
||||
model_repo: Llama-3.1-405B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-405B
|
||||
precision: BF16
|
||||
training_modes: [finetune_qlora]
|
||||
- model: Llama 3 8B
|
||||
mad_tag: pyt_train_llama-3-8b
|
||||
model_repo: Llama-3-8B
|
||||
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 3 70B
|
||||
mad_tag: pyt_train_llama-3-70b
|
||||
model_repo: Llama-3-70B
|
||||
url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 2 7B
|
||||
mad_tag: pyt_train_llama-2-7b
|
||||
model_repo: Llama-2-7B
|
||||
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora, finetune_qlora]
|
||||
- model: Llama 2 13B
|
||||
mad_tag: pyt_train_llama-2-13b
|
||||
model_repo: Llama-2-13B
|
||||
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_train_llama-2-70b
|
||||
model_repo: Llama-2-70B
|
||||
url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora, finetune_qlora]
|
||||
- group: OpenAI
|
||||
tag: openai
|
||||
models:
|
||||
- model: GPT OSS 20B
|
||||
mad_tag: pyt_train_gpt_oss_20b
|
||||
model_repo: GPT-OSS-20B
|
||||
url: https://huggingface.co/openai/gpt-oss-20b
|
||||
precision: BF16
|
||||
training_modes: [HF_finetune_lora]
|
||||
- model: GPT OSS 120B
|
||||
mad_tag: pyt_train_gpt_oss_120b
|
||||
model_repo: GPT-OSS-120B
|
||||
url: https://huggingface.co/openai/gpt-oss-120b
|
||||
precision: BF16
|
||||
training_modes: [HF_finetune_lora]
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen 3 8B
|
||||
mad_tag: pyt_train_qwen3-8b
|
||||
model_repo: Qwen3-8B
|
||||
url: https://huggingface.co/Qwen/Qwen3-8B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Qwen 3 32B
|
||||
mad_tag: pyt_train_qwen3-32b
|
||||
model_repo: Qwen3-32
|
||||
url: https://huggingface.co/Qwen/Qwen3-32B
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora]
|
||||
- model: Qwen 2.5 32B
|
||||
mad_tag: pyt_train_qwen2.5-32b
|
||||
model_repo: Qwen2.5-32B
|
||||
url: https://huggingface.co/Qwen/Qwen2.5-32B
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora]
|
||||
- model: Qwen 2.5 72B
|
||||
mad_tag: pyt_train_qwen2.5-72b
|
||||
model_repo: Qwen2.5-72B
|
||||
url: https://huggingface.co/Qwen/Qwen2.5-72B
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora]
|
||||
- model: Qwen 2 1.5B
|
||||
mad_tag: pyt_train_qwen2-1.5b
|
||||
model_repo: Qwen2-1.5B
|
||||
url: https://huggingface.co/Qwen/Qwen2-1.5B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- model: Qwen 2 7B
|
||||
mad_tag: pyt_train_qwen2-7b
|
||||
model_repo: Qwen2-7B
|
||||
url: https://huggingface.co/Qwen/Qwen2-7B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- group: Flux
|
||||
tag: flux
|
||||
models:
|
||||
- model: FLUX.1-dev
|
||||
mad_tag: pyt_train_flux
|
||||
model_repo: Flux
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
precision: BF16
|
||||
training_modes: [pretrain]
|
||||
@@ -0,0 +1,24 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/pytorch-training:v25.8
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
|
||||
components:
|
||||
ROCm: 6.4.3
|
||||
PyTorch: 2.8.0a0+gitd06a406
|
||||
Python: 3.10.18
|
||||
Transformer Engine: 2.2.0.dev0+a1e66aae
|
||||
Flash Attention: 3.0.0.post1
|
||||
hipBLASLt: 1.1.0-d1b517fc7a
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: primus_pyt_train_llama-3.1-8b
|
||||
model_repo: Llama-3.1-8B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
||||
precision: BF16
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: primus_pyt_train_llama-3.1-70b
|
||||
model_repo: Llama-3.1-70B
|
||||
url: https://huggingface.co/meta-llama/Llama-3.1-70B
|
||||
precision: BF16
|
||||
@@ -1,14 +1,13 @@
|
||||
dockers:
|
||||
- pull_tag: rocm/pytorch-training:v25.7
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.7/images/sha256-cc6fd840ab89cb81d926fc29eca6d075aee9875a55a522675a4b9231c9a0a712
|
||||
- pull_tag: rocm/pytorch-training:v25.8
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-training/v25.8/images/sha256-5082ae01d73fec6972b0d84e5dad78c0926820dcf3c19f301d6c8eb892e573c5
|
||||
components:
|
||||
ROCm: 6.4.2
|
||||
ROCm: 6.4.3
|
||||
PyTorch: 2.8.0a0+gitd06a406
|
||||
Python: 3.10.18
|
||||
Transformer Engine: 2.2.0.dev0+94e53dd8
|
||||
Transformer Engine: 2.2.0.dev0+a1e66aae
|
||||
Flash Attention: 3.0.0.post1
|
||||
hipBLASLt: 1.1.0-4b9a52edfc
|
||||
Triton: 3.3.0
|
||||
hipBLASLt: 1.1.0-d1b517fc7a
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
@@ -151,6 +150,15 @@ model_groups:
|
||||
url: https://huggingface.co/Qwen/Qwen2-7B
|
||||
precision: BF16
|
||||
training_modes: [finetune_fw, finetune_lora]
|
||||
- group: Stable Diffusion
|
||||
tag: sd
|
||||
models:
|
||||
- model: Stable Diffusion XL
|
||||
mad_tag: pyt_huggingface_stable_diffusion_xl_2k_lora_finetuning
|
||||
model_repo: SDXL
|
||||
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
|
||||
precision: BF16
|
||||
training_modes: [finetune_lora]
|
||||
- group: Flux
|
||||
tag: flux
|
||||
models:
|
||||
@@ -160,3 +168,11 @@ model_groups:
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||
precision: BF16
|
||||
training_modes: [pretrain]
|
||||
- group: NCF
|
||||
tag: ncf
|
||||
models:
|
||||
- model: NCF
|
||||
mad_tag: pyt_ncf_training
|
||||
model_repo:
|
||||
url: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/NCF
|
||||
precision: FP32
|
||||
|
||||
@@ -1,325 +1,325 @@
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X series,MI300A,MI350X series
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
|
||||
|
@@ -1,325 +1,325 @@
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X series,MI300A,MI350X series
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
|
||||
|
@@ -1,325 +1,325 @@
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X series,MI300A,MI350X series
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS,⚠️ Scope Downgrade - CAS
|
||||
64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
|
||||
|
@@ -1,325 +1,325 @@
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native
|
||||
Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X series,MI300A,MI350X series
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS
|
||||
64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native
|
||||
64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native,⚠️ Scope Downgrade
|
||||
|
||||
|
391
docs/data/reference/precision-support/precision-support.yaml
Normal file
391
docs/data/reference/precision-support/precision-support.yaml
Normal file
@@ -0,0 +1,391 @@
|
||||
# rocm-library-support.yaml
|
||||
library_groups:
|
||||
- group: "ML & Computer Vision"
|
||||
tag: "ml-cv"
|
||||
libraries:
|
||||
- name: "Composable Kernel"
|
||||
tag: "composable-kernel"
|
||||
doc_link: "composable_kernel:reference/Composable_Kernel_supported_scalar_types"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "float4"
|
||||
support: "✅"
|
||||
- type: "float6 (E2M3)"
|
||||
support: "✅"
|
||||
- type: "float6 (E3M2)"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "MIGraphX"
|
||||
tag: "migraphx"
|
||||
doc_link: "amdmigraphx:reference/cpp"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "⚠️"
|
||||
- type: "int16"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "int64"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "MIOpen"
|
||||
tag: "miopen"
|
||||
doc_link: "miopen:reference/datatypes"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "⚠️"
|
||||
- type: "int32"
|
||||
support: "⚠️"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "⚠️"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "⚠️"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "⚠️"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "⚠️"
|
||||
|
||||
- group: "Communication"
|
||||
tag: "communication"
|
||||
libraries:
|
||||
- name: "RCCL"
|
||||
tag: "rccl"
|
||||
doc_link: "rccl:api-reference/library-specification"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "int64"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- group: "Math Libraries"
|
||||
tag: "math-libs"
|
||||
libraries:
|
||||
- name: "hipBLAS"
|
||||
tag: "hipblas"
|
||||
doc_link: "hipblas:reference/data-type-support"
|
||||
data_types:
|
||||
- type: "float16"
|
||||
support: "⚠️"
|
||||
- type: "bfloat16"
|
||||
support: "⚠️"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipBLASLt"
|
||||
tag: "hipblaslt"
|
||||
doc_link: "hipblaslt:reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "float4"
|
||||
support: "✅"
|
||||
- type: "float6 (E2M3)"
|
||||
support: "✅"
|
||||
- type: "float6 (E3M2)"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipFFT"
|
||||
tag: "hipfft"
|
||||
doc_link: "hipfft:reference/fft-api-usage"
|
||||
data_types:
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipRAND"
|
||||
tag: "hiprand"
|
||||
doc_link: "hiprand:api-reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "Output only"
|
||||
- type: "int16"
|
||||
support: "Output only"
|
||||
- type: "int32"
|
||||
support: "Output only"
|
||||
- type: "int64"
|
||||
support: "Output only"
|
||||
- type: "float16"
|
||||
support: "Output only"
|
||||
- type: "float32"
|
||||
support: "Output only"
|
||||
- type: "float64"
|
||||
support: "Output only"
|
||||
|
||||
- name: "hipSOLVER"
|
||||
tag: "hipsolver"
|
||||
doc_link: "hipsolver:reference/precision"
|
||||
data_types:
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipSPARSE"
|
||||
tag: "hipsparse"
|
||||
doc_link: "hipsparse:reference/precision"
|
||||
data_types:
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipSPARSELt"
|
||||
tag: "hipsparselt"
|
||||
doc_link: "hipsparselt:reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocBLAS"
|
||||
tag: "rocblas"
|
||||
doc_link: "rocblas:reference/data-type-support"
|
||||
data_types:
|
||||
- type: "float16"
|
||||
support: "⚠️"
|
||||
- type: "bfloat16"
|
||||
support: "⚠️"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocFFT"
|
||||
tag: "rocfft"
|
||||
doc_link: "rocfft:reference/api"
|
||||
data_types:
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocRAND"
|
||||
tag: "rocrand"
|
||||
doc_link: "rocrand:api-reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "Output only"
|
||||
- type: "int16"
|
||||
support: "Output only"
|
||||
- type: "int32"
|
||||
support: "Output only"
|
||||
- type: "int64"
|
||||
support: "Output only"
|
||||
- type: "float16"
|
||||
support: "Output only"
|
||||
- type: "float32"
|
||||
support: "Output only"
|
||||
- type: "float64"
|
||||
support: "Output only"
|
||||
|
||||
- name: "rocSOLVER"
|
||||
tag: "rocsolver"
|
||||
doc_link: "rocsolver:reference/precision"
|
||||
data_types:
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocSPARSE"
|
||||
tag: "rocsparse"
|
||||
doc_link: "rocsparse:reference/precision"
|
||||
data_types:
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocWMMA"
|
||||
tag: "rocwmma"
|
||||
doc_link: "rocwmma:api-reference/api-reference-guide"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "Output only"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "Input only"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "Input only"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "tensorfloat32"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "Tensile"
|
||||
tag: "tensile"
|
||||
doc_link: "tensile:reference/precision-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "float8 (E4M3)"
|
||||
support: "✅"
|
||||
- type: "float8 (E5M2)"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "tensorfloat32"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- group: "Primitives"
|
||||
tag: "primitives"
|
||||
libraries:
|
||||
- name: "hipCUB"
|
||||
tag: "hipcub"
|
||||
doc_link: "hipcub:api-reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int16"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "int64"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "hipTensor"
|
||||
tag: "hiptensor"
|
||||
doc_link: "hiptensor:api-reference/api-reference"
|
||||
data_types:
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocPRIM"
|
||||
tag: "rocprim"
|
||||
doc_link: "rocprim:reference/data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int16"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "int64"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "✅"
|
||||
- type: "bfloat16"
|
||||
support: "✅"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
|
||||
- name: "rocThrust"
|
||||
tag: "rocthrust"
|
||||
doc_link: "rocthrust:data-type-support"
|
||||
data_types:
|
||||
- type: "int8"
|
||||
support: "✅"
|
||||
- type: "int16"
|
||||
support: "✅"
|
||||
- type: "int32"
|
||||
support: "✅"
|
||||
- type: "int64"
|
||||
support: "✅"
|
||||
- type: "float16"
|
||||
support: "⚠️"
|
||||
- type: "bfloat16"
|
||||
support: "⚠️"
|
||||
- type: "float32"
|
||||
support: "✅"
|
||||
- type: "float64"
|
||||
support: "✅"
|
||||
BIN
docs/data/rocm-software-stack-7_0_0.jpg
Normal file
BIN
docs/data/rocm-software-stack-7_0_0.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 350 KiB |
@@ -16,103 +16,112 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
|
||||
- Components
|
||||
- Resources
|
||||
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812``
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.10.1_20250909``
|
||||
(latest)
|
||||
-
|
||||
-
|
||||
* ROCm 6.4.1
|
||||
* vLLM 0.10.1
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <../vllm>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.1_20250909/images/sha256-1113268572e26d59b205792047bea0e61e018e79aeadceba118b7bf23cb3715c>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.10.0_20250812``
|
||||
-
|
||||
* ROCm 6.4.1
|
||||
* vLLM 0.10.0
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
* :doc:`Documentation <../vllm>`
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.10.0-20250812>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.10.0_20250812/images/sha256-4c277ad39af3a8c9feac9b30bf78d439c74d9b4728e788a419d3f1d0c30cacaa>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.9.1_20250715``
|
||||
-
|
||||
-
|
||||
* ROCm 6.4.1
|
||||
* vLLM 0.9.1
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.9.1-20250715>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250715/images/sha256-4a429705fa95a58f6d20aceab43b1b76fa769d57f32d5d28bd3f4e030e2a78ea>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.9.1_20250702``
|
||||
-
|
||||
-
|
||||
* ROCm 6.4.1
|
||||
* vLLM 0.9.1
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.9.1-20250702>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250702/images/sha256-45068a2079cb8df554ed777141bf0c67d6627c470a897256e60c9f262677faab>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605``
|
||||
-
|
||||
-
|
||||
* ROCm 6.4.1
|
||||
* vLLM 0.9.0.1
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.9.0.1-20250605>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.3.1_vllm_0.8.5_20250521``
|
||||
-
|
||||
-
|
||||
* ROCm 6.3.1
|
||||
* 0.8.5 vLLM (0.8.6.dev)
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.5-20250521>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.3.1_vllm_0.8.5_20250513``
|
||||
-
|
||||
-
|
||||
* ROCm 6.3.1
|
||||
* vLLM 0.8.5
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.5-20250513>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250513/images/sha256-5c8b4436dd0464119d9df2b44c745fadf81512f18ffb2f4b5dc235c71ebe26b4>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.3.1_instinct_vllm0.8.3_20250415``
|
||||
-
|
||||
-
|
||||
* ROCm 6.3.1
|
||||
* vLLM 0.8.3
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.8.3-20250415>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.8.3_20250415/images/sha256-ad9062dea3483d59dedb17c67f7c49f30eebd6eb37c3fac0a171fb19696cc845>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.3.1_instinct_vllm0.7.3_20250325``
|
||||
-
|
||||
-
|
||||
* ROCm 6.3.1
|
||||
* vLLM 0.7.3
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.7.3-20250325>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_instinct_vllm0.7.3_20250325/images/sha256-25245924f61750b19be6dcd8e787e46088a496c1fe17ee9b9e397f3d84d35640>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6``
|
||||
-
|
||||
-
|
||||
* ROCm 6.3.1
|
||||
* vLLM 0.6.6
|
||||
* PyTorch 2.7.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.6.6>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_mi300_ubuntu22.04_py3.12_vllm_0.6.6/images/sha256-9a12ef62bbbeb5a4c30a01f702c8e025061f575aa129f291a49fbd02d6b4d6c9>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4``
|
||||
-
|
||||
-
|
||||
* ROCm 6.2.1
|
||||
* vLLM 0.6.4
|
||||
* PyTorch 2.5.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.6.4>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4/images/sha256-ccbb74cc9e7adecb8f7bdab9555f7ac6fc73adb580836c2a35ca96ff471890d8>`__
|
||||
|
||||
* - ``rocm/vllm:rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50``
|
||||
-
|
||||
-
|
||||
* ROCm 6.2.0
|
||||
* vLLM 0.4.3
|
||||
* PyTorch 2.4.0
|
||||
-
|
||||
-
|
||||
* :doc:`Documentation <vllm-0.4.3>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu22.04_py3.9_vllm_7c5fd50/images/sha256-9e4dd4788a794c3d346d7d0ba452ae5e92d39b8dfac438b2af8efdc7f15d22c0>`__
|
||||
|
||||
|
||||
@@ -0,0 +1,257 @@
|
||||
.. meta::
|
||||
:description: SGLang multi-node disaggregated distributed inference using Mooncake
|
||||
:keywords: model, sglang, mooncake, disagg, disaggregated, distributed, multi-node, docker
|
||||
|
||||
******************************************
|
||||
SGLang distributed inference with Mooncake
|
||||
******************************************
|
||||
|
||||
As LLM inference increasingly demands handling massive models and dynamic workloads, efficient
|
||||
distributed inference becomes essential. Traditional co-located architectures face bottlenecks due
|
||||
to tightly coupled memory and compute resources, which limits scalability and flexibility.
|
||||
Disaggregated inference refers to the process of splitting the inference of LLMs into distinct
|
||||
phases. This architecture, facilitated by libraries like Mooncake, uses high-bandwidth
|
||||
RDMA to transfer the Key-Value (KV) cache between prefill and decode nodes.
|
||||
This allows for independent resource scaling and optimization, resulting in
|
||||
improved efficiency and throughput.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/sglang-distributed-benchmark-models.yaml
|
||||
|
||||
{% set docker = data.dockers[0] %}
|
||||
|
||||
`SGLang <https://docs.sglang.ai>`__ is a high-performance inference and
|
||||
serving engine for large language models (LLMs) and vision models. The
|
||||
ROCm-enabled `SGLang base Docker image <{{ docker.docker_hub_url }}>`__
|
||||
bundles SGLang with PyTorch, which is optimized for AMD Instinct MI300X series
|
||||
accelerators. It includes the following software components:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Software component
|
||||
- Version
|
||||
|
||||
{% for component_name, component_version in docker.components.items() %}
|
||||
* - {{ component_name }}
|
||||
- {{ component_version }}
|
||||
{% endfor %}
|
||||
|
||||
The following guides on setting up and running SGLang and Mooncake for disaggregated
|
||||
distributed inference on a Slurm cluster using AMD Instinct MI300X series accelerators backed by
|
||||
Mellanox CX-7 NICs.
|
||||
|
||||
Prerequisites
|
||||
=============
|
||||
|
||||
Before starting, ensure you have:
|
||||
|
||||
* A Slurm cluster with at least three nodes: one for the proxy, one for prefill (``xP``), and one for decode (``yD``).
|
||||
|
||||
``Nodes -> xP + yD + 1``
|
||||
|
||||
* A Dockerized environment with SGLang, Mooncake, etcd, and NIC drivers built in. See :ref:`sglang-disagg-inf-build-docker-image` for instructions.
|
||||
|
||||
* A shared filesystem for storing models, scripts, and logs (cluster-specific).
|
||||
|
||||
Supported models
|
||||
================
|
||||
|
||||
The following models are supported for SGLang disaggregated prefill/decode
|
||||
inference. Some instructions, commands, and recommendations in this
|
||||
documentation might vary by selected model.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/sglang-distributed-benchmark-models.yaml
|
||||
|
||||
{% set model_groups = data.model_groups %}
|
||||
.. raw:: html
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row gx-0">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model type</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row gx-0 pt-1">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if models|length % 3 == 0 %}
|
||||
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.model_repo | lower }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% else %}
|
||||
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.model_repo | lower }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.model_repo }}
|
||||
|
||||
.. note::
|
||||
|
||||
See the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`__ to learn more about this model.
|
||||
Some models require access authorization prior to use through an external license agreement with a third party.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. _sglang-disagg-inf-build-docker-image:
|
||||
|
||||
Build the Docker image
|
||||
----------------------
|
||||
|
||||
Get the Dockerfile located in
|
||||
`<https://github.com/ROCm/MAD/blob/develop/docker/sglang_dissag_inference.ubuntu.amd.Dockerfile>`__.
|
||||
It uses `lmsysorg/sglang:v0.5.2rc1-rocm700-mi30x
|
||||
<https://hub.docker.com/layers/lmsysorg/sglang/v0.4.9.post1-rocm630/images/sha256-2f6b1748e4bcc70717875a7da76c87795fd8aa46a9646e08d38aa7232fc78538>`__
|
||||
as the base Docker image and installs the necessary components for Mooncake, etcd, and Mellanox network
|
||||
drivers.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD.git
|
||||
cd MAD/docker
|
||||
docker build \
|
||||
-t sglang_disagg_pd_image \
|
||||
-f sglang_disagg_inference.ubuntu.amd.Dockerfile .
|
||||
|
||||
Benchmarking
|
||||
============
|
||||
|
||||
The `<https://github.com/ROCm/MAD/tree/develop/scripts/sglang_dissag>`__
|
||||
repository contains scripts to launch SGLang inference with prefill/decode
|
||||
disaggregation via Mooncake for supported models.
|
||||
|
||||
* `scripts/sglang_dissag/run_xPyD_models.slurm <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/run_xPyD_models.slurm>`__
|
||||
-- the main Slurm batch script to launch Docker containers on all nodes using ``sbatch`` or ``salloc``.
|
||||
|
||||
* `scripts/sglang_dissag/sglang_disagg_server.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/sglang_disagg_server.sh>`__
|
||||
-- the entrypoint script that runs inside each container to start the correct service -- proxy, prefill, or decode.
|
||||
|
||||
* `scripts/sglang_dissag/benchmark_xPyD.sh <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/benchmark_xPyD.sh>`__
|
||||
-- the benchmark script to run the GSM8K accuracy benchmark and the SGLang benchmarking tool for performance measurement.
|
||||
|
||||
* `scripts/sglang_dissag/benchmark_parser.py <https://github.com/ROCm/MAD/blob/develop/scripts/sglang_disagg/benchmark_parser.py>`__
|
||||
-- the log parser script to be run on the concurrency benchmark log file to generate tabulated data.
|
||||
|
||||
Launch the service
|
||||
------------------
|
||||
|
||||
The service is deployed using a Slurm batch script that orchestrates the containers across the
|
||||
allocated nodes.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/sglang-distributed-benchmark-models.yaml
|
||||
|
||||
{% set model_groups = data.model_groups %}
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.model_repo }}
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# Clone the MAD repo if you haven't already and
|
||||
# navigate to the scripts directory
|
||||
git clone https://github.com/ROCm/MAD.git
|
||||
cd MAD/scripts/sglang_disagg/
|
||||
|
||||
# Slurm sbatch run command
|
||||
export DOCKER_IMAGE_NAME=sglang_disagg_pd_image
|
||||
export xP=<num_prefill_nodes>
|
||||
export yD=<num_decode_nodes>
|
||||
export MODEL_NAME={{ model.model_repo }}
|
||||
# num_nodes = xP + yD + 1
|
||||
sbatch -N <num_nodes> -n <num_nodes> --nodelist=<Nodes> run_xPyD_models.slurm
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Post-run logs and testing
|
||||
-------------------------
|
||||
|
||||
Logs are stored in your shared filesystem in the directory specified by the ``LOG_PATH`` variable in the Slurm script.
|
||||
A new directory named after the Slurm job ID is created for each run.
|
||||
|
||||
Inside that directory, you can access various logs:
|
||||
|
||||
* ``pd_sglang_bench_serving.sh_NODE<...>.log`` -- the main log for each server node.
|
||||
|
||||
* ``etcd_NODE<...>.log`` -- logs for etcd services.
|
||||
|
||||
* ``prefill_NODE<...>.log`` -- logs for the prefill services.
|
||||
|
||||
* ``decode_NODE<...>.log`` -- logs for the decode services.
|
||||
|
||||
Use the benchmark parser script for concurrency logs to tabulate different data.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
python3 benchmark_parser.py <log_path/benchmark_XXX_CONCURRENCY.log>
|
||||
|
||||
To verify the service is responsive, you can try sending a ``curl`` request to test the launched
|
||||
server from the Docker container on the proxy node. For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
curl -X POST http://127.0.0.1:30000/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{ "text": "Let me tell you a story ", "sampling_params": { "temperature": 0.3 } }'
|
||||
|
||||
Known issues
|
||||
============
|
||||
|
||||
When running larger models, such as DeepSeek-V3 and Llama-3.1-405B-Instruct-FP8-KV, at
|
||||
higher concurrency levels (512+), the following error might occur:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
<TransferEncodingError: 400, message:
|
||||
Not enough data to satisfy transfer length header.
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
|
||||
This leads to dropping requests and lower throughput.
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- To learn about Mooncake, see `Welcome to Mooncake <https://kvcache-ai.github.io/Mooncake/>`__.
|
||||
|
||||
- To learn more about the options for latency and throughput benchmark scripts,
|
||||
see `<https://github.com/sgl-project/sglang/tree/main/benchmark/blog_v0_2>`__.
|
||||
|
||||
- See the base upstream Docker image on `Docker Hub <https://hub.docker.com/layers/lmsysorg/sglang/v0.5.2rc1-rocm700-mi30x/images/sha256-10c4ee502ddba44dd8c13325e6e03868bfe7f43d23d0a44780a8ee8b393f4729>`__.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`__.
|
||||
|
||||
- For application performance optimization strategies for HPC and AI workloads,
|
||||
including inference with vLLM, see :doc:`/how-to/rocm-for-ai/inference-optimization/workload`.
|
||||
|
||||
- To learn how to run community models from Hugging Face on AMD GPUs, see
|
||||
:doc:`Running models from Hugging Face </how-to/rocm-for-ai/inference/hugging-face-models>`.
|
||||
|
||||
- To learn how to fine-tune LLMs and optimize inference, see
|
||||
:doc:`Fine-tuning LLMs and inference optimization </how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference>`.
|
||||
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`previous-versions/sglang-history` to find documentation for previous releases
|
||||
of SGLang inference performance testing.
|
||||
@@ -1,6 +1,5 @@
|
||||
.. meta::
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the
|
||||
ROCm vLLM Docker image.
|
||||
:description: Learn how to validate LLM inference performance on MI300X accelerators using AMD MAD and the ROCm vLLM Docker image.
|
||||
:keywords: model, MAD, automation, dashboarding, validate
|
||||
|
||||
**********************************
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
:keywords: ROCm, AI, LLM, train, jax, torch, Llama, flux, tutorial, docker
|
||||
|
||||
******************************************
|
||||
Training a model with JAX MaxText for ROCm
|
||||
Training a model with JAX MaxText on ROCm
|
||||
******************************************
|
||||
|
||||
MaxText is a high-performance, open-source framework built on the Google JAX
|
||||
@@ -406,8 +406,6 @@ benchmark results:
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- See the ROCm/maxtext benchmarking README at `<https://github.com/ROCm/maxtext/blob/main/benchmarks/gpu-rocm/readme.md>`__.
|
||||
|
||||
- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
|
||||
@@ -5,15 +5,13 @@
|
||||
:keywords: ROCm, AI, LLM, train, Megatron-LM, megatron, Llama, tutorial, docker, torch
|
||||
|
||||
******************************************
|
||||
Training a model with Megatron-LM for ROCm
|
||||
Training a model with Megatron-LM on ROCm
|
||||
******************************************
|
||||
|
||||
.. caution::
|
||||
|
||||
The ROCm Megatron-LM framework now has limited support with this Docker
|
||||
environment; it now focuses on Primus with Megatron-Core. See :doc:`primus-megatron`.
|
||||
|
||||
To learn how to migrate your existing workloads to Primus with Megatron-Core,
|
||||
Primus with Megatron supersedes this ROCm Megatron-LM training workflow.
|
||||
To learn how to migrate workloads from Megatron-LM to Primus with Megatron,
|
||||
see :doc:`previous-versions/megatron-lm-primus-migration-guide`.
|
||||
|
||||
The `Megatron-LM framework for ROCm <https://github.com/ROCm/Megatron-LM>`_ is
|
||||
@@ -807,9 +805,16 @@ Single node training
|
||||
AC=none \
|
||||
SEQ_LEN=4096 \
|
||||
PAD_LEN=4096 \
|
||||
TRAIN_ITERS=50 \
|
||||
TRAIN_ITERS=20 \
|
||||
bash examples/deepseek_v2/train_deepseekv2.sh
|
||||
|
||||
.. note::
|
||||
|
||||
Note that DeepSeek-V2-Lite is experiencing instability due to GPU memory access fault
|
||||
for large iterations.
|
||||
For stability, it's recommended to use Primus for this workload.
|
||||
See :doc:`primus-megatron`.
|
||||
|
||||
.. container:: model-doc pyt_megatron_lm_train_mixtral-8x7b
|
||||
|
||||
To run training on a single node for Mixtral 8x7B (MoE with expert parallel),
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
|
||||
|
||||
******************************************
|
||||
Training MPT-30B with LLM Foundry and ROCm
|
||||
Training MPT-30B with LLM Foundry on ROCm
|
||||
******************************************
|
||||
|
||||
MPT-30B is a 30-billion parameter decoder-style transformer-based model from
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
PyTorch training performance testing version history
|
||||
****************************************************
|
||||
|
||||
This table lists previous versions of the ROCm Megatron-LM training Docker image for
|
||||
This table lists previous versions of the ROCm PyTorch training Docker image for
|
||||
inference performance testing. For detailed information about available models
|
||||
for benchmarking, see the version-specific documentation. You can find tagged
|
||||
previous releases of the ``ROCm/pytorch-training`` Docker image on `Docker Hub <https://hub.docker.com/r/rocm/pytorch-training/tags>`_.
|
||||
@@ -16,12 +16,21 @@ previous releases of the ``ROCm/pytorch-training`` Docker image on `Docker Hub <
|
||||
- Components
|
||||
- Resources
|
||||
|
||||
* - v25.8 (latest)
|
||||
-
|
||||
* ROCm 6.4.3
|
||||
* PyTorch 2.8.0a0+gitd06a406
|
||||
-
|
||||
* :doc:`Primus PyTorch Training documentation <../primus-pytorch>`
|
||||
* :doc:`PyTorch training (legacy) documentation <../pytorch-training>`
|
||||
* `Docker Hub <https://hub.docker.com/r/rocm/pytorch-training/tags>`__
|
||||
|
||||
* - v25.7
|
||||
-
|
||||
* ROCm 6.4.2
|
||||
* PyTorch 2.8.0a0+gitd06a406
|
||||
-
|
||||
* :doc:`Documentation <../pytorch-training>`
|
||||
* :doc:`Documentation <pytorch-training-v25.7>`
|
||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.7/images/sha256-cc6fd840ab89cb81d926fc29eca6d075aee9875a55a522675a4b9231c9a0a712>`__
|
||||
|
||||
* - v25.6
|
||||
|
||||
@@ -0,0 +1,567 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: How to train a model using PyTorch for ROCm.
|
||||
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
|
||||
|
||||
**************************************
|
||||
Training a model with PyTorch for ROCm
|
||||
**************************************
|
||||
|
||||
.. caution::
|
||||
|
||||
This documentation does not reflect the latest version of ROCm vLLM
|
||||
performance benchmark documentation. See :doc:`../pytorch-training` for the latest version.
|
||||
|
||||
PyTorch is an open-source machine learning framework that is widely used for
|
||||
model training with GPU-optimized components for transformer-based models.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.7-benchmark-models.yaml
|
||||
|
||||
{% set dockers = data.dockers %}
|
||||
{% set docker = dockers[0] %}
|
||||
The `PyTorch for ROCm training Docker <{{ docker.docker_hub_url }}>`__
|
||||
(``{{ docker.pull_tag }}``) image provides a prebuilt optimized environment for fine-tuning and pretraining a
|
||||
model on AMD Instinct MI325X and MI300X accelerators. It includes the following software components to accelerate
|
||||
training workloads:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Software component
|
||||
- Version
|
||||
|
||||
{% for component_name, component_version in docker.components.items() %}
|
||||
* - {{ component_name }}
|
||||
- {{ component_version }}
|
||||
{% endfor %}
|
||||
|
||||
.. _amd-pytorch-training-model-support-v257:
|
||||
|
||||
Supported models
|
||||
================
|
||||
|
||||
The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
|
||||
Some instructions, commands, and training recommendations in this documentation might
|
||||
vary by model -- select one to get started.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.7-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.dockers[0] %}
|
||||
{% set model_groups = data.model_groups %}
|
||||
.. raw:: html
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row gx-0">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row gx-0 pt-1">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Variant</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if models|length % 3 == 0 %}
|
||||
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% else %}
|
||||
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
.. _amd-pytorch-training-supported-training-modes-v257:
|
||||
|
||||
The following table lists supported training modes per model.
|
||||
|
||||
.. dropdown:: Supported training modes
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Model
|
||||
- Supported training modes
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
* - {{ model.model }}
|
||||
- ``{{ model.training_modes | join('``, ``') }}``
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. note::
|
||||
|
||||
Some model and fine-tuning combinations are not listed. This is
|
||||
because the `upstream torchtune repository <https://github.com/pytorch/torchtune>`__
|
||||
doesn't provide default YAML configurations for them.
|
||||
For advanced usage, you can create a custom configuration to enable
|
||||
unlisted fine-tuning methods by using an existing file in the
|
||||
``/workspace/torchtune/recipes/configs`` directory as a template.
|
||||
|
||||
.. _amd-pytorch-training-performance-measurements-v257:
|
||||
|
||||
Performance measurements
|
||||
========================
|
||||
|
||||
To evaluate performance, the
|
||||
`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
|
||||
page provides reference throughput and latency measurements for training
|
||||
popular AI models.
|
||||
|
||||
.. note::
|
||||
|
||||
The performance data presented in
|
||||
`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
|
||||
should not be interpreted as the peak performance achievable by AMD
|
||||
Instinct MI325X and MI300X accelerators or ROCm software.
|
||||
|
||||
System validation
|
||||
=================
|
||||
|
||||
Before running AI workloads, it's important to validate that your AMD hardware is configured
|
||||
correctly and performing optimally.
|
||||
|
||||
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
|
||||
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
|
||||
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
|
||||
before starting training.
|
||||
|
||||
To test for optimal performance, consult the recommended :ref:`System health benchmarks
|
||||
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
|
||||
system's configuration.
|
||||
|
||||
This Docker image is optimized for specific model configurations outlined
|
||||
below. Performance can vary for other training workloads, as AMD
|
||||
doesn’t test configurations and run conditions outside those described.
|
||||
|
||||
Run training
|
||||
============
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/previous-versions/pytorch-training-v25.7-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.dockers[0] %}
|
||||
{% set model_groups = data.model_groups %}
|
||||
|
||||
Once the setup is complete, choose between two options to start benchmarking training:
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
2. For example, use this command to run the performance benchmark test on the {{ model.model }} model
|
||||
using one node with the {{ model.precision }} data type on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
madengine run \
|
||||
--tags {{ model.mad_tag }} \
|
||||
--keep-model-dir \
|
||||
--live-output \
|
||||
--timeout 28800
|
||||
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-{{ model.mad_tag }}``. The latency and throughput reports of the
|
||||
model are collected in ``~/MAD/perf.csv``.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
.. rubric:: Download the Docker image and required packages
|
||||
|
||||
1. Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull {{ unified_docker.pull_tag }}
|
||||
|
||||
2. Run the Docker container.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
--name training_env \
|
||||
{{ unified_docker.pull_tag }}
|
||||
|
||||
Use these commands if you exit the ``training_env`` container and need to return to it.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker start training_env
|
||||
docker exec -it training_env bash
|
||||
|
||||
3. In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
|
||||
repository and navigate to the benchmark scripts directory
|
||||
``/workspace/MAD/scripts/pytorch_train``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/pytorch_train
|
||||
|
||||
.. rubric:: Prepare training datasets and dependencies
|
||||
|
||||
1. The following benchmarking examples require downloading models and datasets
|
||||
from Hugging Face. To ensure successful access to gated repos, set your
|
||||
``HF_TOKEN``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=$your_personal_hugging_face_access_token
|
||||
|
||||
2. Run the setup script to install libraries and datasets needed for benchmarking.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_setup.sh
|
||||
|
||||
.. container:: model-doc pyt_train_llama-3.1-8b
|
||||
|
||||
``pytorch_benchmark_setup.sh`` installs the following libraries for Llama 3.1 8B:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Library
|
||||
- Reference
|
||||
|
||||
* - ``accelerate``
|
||||
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
|
||||
|
||||
* - ``datasets``
|
||||
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
|
||||
|
||||
.. container:: model-doc pyt_train_llama-3.1-70b
|
||||
|
||||
``pytorch_benchmark_setup.sh`` installs the following libraries for Llama 3.1 70B:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Library
|
||||
- Reference
|
||||
|
||||
* - ``datasets``
|
||||
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
|
||||
|
||||
* - ``torchdata``
|
||||
- `TorchData <https://pytorch.org/data/beta/index.html>`_
|
||||
|
||||
* - ``tomli``
|
||||
- `Tomli <https://pypi.org/project/tomli/>`_
|
||||
|
||||
* - ``tiktoken``
|
||||
- `tiktoken <https://github.com/openai/tiktoken>`_
|
||||
|
||||
* - ``blobfile``
|
||||
- `blobfile <https://pypi.org/project/blobfile/>`_
|
||||
|
||||
* - ``tabulate``
|
||||
- `tabulate <https://pypi.org/project/tabulate/>`_
|
||||
|
||||
* - ``wandb``
|
||||
- `Weights & Biases <https://github.com/wandb/wandb>`_
|
||||
|
||||
* - ``sentencepiece``
|
||||
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
|
||||
|
||||
* - ``tensorboard``
|
||||
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
|
||||
|
||||
.. container:: model-doc pyt_train_flux
|
||||
|
||||
``pytorch_benchmark_setup.sh`` installs the following libraries for FLUX:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Library
|
||||
- Reference
|
||||
|
||||
* - ``accelerate``
|
||||
- `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
|
||||
|
||||
* - ``datasets``
|
||||
- `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
|
||||
|
||||
* - ``sentencepiece``
|
||||
- `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
|
||||
|
||||
* - ``tensorboard``
|
||||
- `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
|
||||
|
||||
* - ``csvkit``
|
||||
- `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
|
||||
|
||||
* - ``deepspeed``
|
||||
- `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
|
||||
|
||||
* - ``diffusers``
|
||||
- `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
|
||||
|
||||
* - ``GitPython``
|
||||
- `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
|
||||
|
||||
* - ``opencv-python-headless``
|
||||
- `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
|
||||
|
||||
* - ``peft``
|
||||
- `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
|
||||
|
||||
* - ``protobuf``
|
||||
- `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
|
||||
|
||||
* - ``pytest``
|
||||
- `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
|
||||
|
||||
* - ``python-dotenv``
|
||||
- `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
|
||||
|
||||
* - ``seaborn``
|
||||
- `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
|
||||
|
||||
* - ``transformers``
|
||||
- `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
|
||||
|
||||
``pytorch_benchmark_setup.sh`` downloads the following datasets from Hugging Face:
|
||||
|
||||
* `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
{% set training_modes = model.training_modes %}
|
||||
{% set training_mode_descs = {
|
||||
"pretrain": "Benchmark pre-training.",
|
||||
"HF_pretrain": "Llama 3.1 8B pre-training with FP8 precision."
|
||||
} %}
|
||||
{% set available_modes = training_modes | select("in", ["pretrain", "HF_pretrain"]) | list %}
|
||||
{% if available_modes %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
.. rubric:: Pre-training
|
||||
|
||||
To start the pre-training benchmark, use the following command with the
|
||||
appropriate options. See the following list of options and their descriptions.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t {% if available_modes | length == 1 %}{{ available_modes[0] }}{% else %}$training_mode{% endif %} \
|
||||
-m {{ model.model_repo }} \
|
||||
-p $datatype \
|
||||
-s $sequence_length
|
||||
|
||||
{% if model.mad_tag == "pyt_train_flux" %}
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
.. note::
|
||||
|
||||
Currently, FLUX models are not supported out-of-the-box on {{ unified_docker.pull_tag }}.
|
||||
To use FLUX, refer to the previous version of the ``pytorch-training`` Docker: :doc:`pytorch-training-v25.6`
|
||||
|
||||
Occasionally, downloading the Flux dataset might fail. In the event of this
|
||||
error, manually download it from Hugging Face at
|
||||
`black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
|
||||
and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
|
||||
the required dataset.
|
||||
{% endif %}
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
{% for mode in available_modes %}
|
||||
* - {% if loop.first %}``$training_mode``{% endif %}
|
||||
- ``{{ mode }}``
|
||||
- {{ training_mode_descs[mode] }}
|
||||
{% endfor %}
|
||||
|
||||
* - ``$datatype``
|
||||
- ``BF16``{% if model.mad_tag == "pyt_train_llama-3.1-8b" %} or ``FP8``{% endif %}
|
||||
- Only Llama 3.1 8B supports FP8 precision.
|
||||
|
||||
* - ``$sequence_length``
|
||||
- Sequence length for the language model.
|
||||
- Between 2048 and 8192. 8192 by default.
|
||||
{% endif %}
|
||||
|
||||
{% set training_mode_descs = {
|
||||
"finetune_fw": "Full weight fine-tuning (BF16 and FP8 supported).",
|
||||
"finetune_lora": "LoRA fine-tuning (BF16 supported).",
|
||||
"finetune_qlora": "QLoRA fine-tuning (BF16 supported).",
|
||||
"HF_finetune_lora": "LoRA fine-tuning with Hugging Face PEFT.",
|
||||
} %}
|
||||
{% set available_modes = training_modes | select("in", ["finetune_fw", "finetune_lora", "finetune_qlora", "HF_finetune_lora"]) | list %}
|
||||
{% if available_modes %}
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
.. rubric:: Fine-tuning
|
||||
|
||||
To start the fine-tuning benchmark, use the following command with the
|
||||
appropriate options. See the following list of options and their descriptions.
|
||||
See :ref:`supported training modes <amd-pytorch-training-supported-training-modes-v257>`.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t $training_mode \
|
||||
-m {{ model.model_repo }} \
|
||||
-p $datatype \
|
||||
-s $sequence_length
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
{% for mode in available_modes %}
|
||||
* - {% if loop.first %}``$training_mode``{% endif %}
|
||||
- ``{{ mode }}``
|
||||
- {{ training_mode_descs[mode] }}
|
||||
{% endfor %}
|
||||
|
||||
* - ``$datatype``
|
||||
- ``BF16``{% if "finetune_fw" in available_modes %} or ``FP8``{% endif %}
|
||||
- All models support BF16.{% if "finetune_fw" in available_modes %} FP8 is only available for full weight fine-tuning.{% endif %}
|
||||
|
||||
* - ``$sequence_length``
|
||||
- Between 2048 and 16384.
|
||||
- Sequence length for the language model.
|
||||
|
||||
{% if model.mad_tag in ["pyt_train_llama3.2-vision-11b", "pyt_train_llama-3.2-vision-90b"] %}
|
||||
.. note::
|
||||
|
||||
For LoRA and QLoRA support with vision models (Llama 3.2 11B and 90B),
|
||||
use the following torchtune commit for compatibility:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git checkout 48192e23188b1fc524dd6d127725ceb2348e7f0e
|
||||
|
||||
{% elif model.mad_tag in ["pyt_train_llama-2-7b", "pyt_train_llama-2-13b", "pyt_train_llama-2-70b"] %}
|
||||
.. note::
|
||||
|
||||
You might encounter the following error with Llama 2: ``ValueError: seq_len (16384) of
|
||||
input tensor should be smaller than max_seq_len (4096)``.
|
||||
This error indicates that an input sequence is longer than the model's maximum context window.
|
||||
|
||||
Ensure your tokenized input does not exceed the model's ``max_seq_len`` (4096
|
||||
tokens in this case). You can resolve this by truncating the input or splitting
|
||||
it into smaller chunks before passing it to the model.
|
||||
|
||||
Note on reproducibility: The results in this guide are based on
|
||||
commit ``b4c98ac`` from the upstream
|
||||
`<https://github.com/pytorch/torchtune>`__ repository. For the
|
||||
latest updates, you can use the main branch.
|
||||
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. rubric:: Benchmarking examples
|
||||
|
||||
For examples of benchmarking commands, see `<https://github.com/ROCm/MAD/tree/develop/benchmark/pytorch_train#benchmarking-examples>`__.
|
||||
|
||||
Multi-node training
|
||||
-------------------
|
||||
|
||||
Pre-training
|
||||
~~~~~~~~~~~~
|
||||
|
||||
Multi-node training with torchtitan is supported. The provided SLURM script is pre-configured for Llama 3 70B.
|
||||
|
||||
To launch the training job on a SLURM cluster for Llama 3 70B, run the following commands from the MAD repository.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# In the MAD repository
|
||||
cd scripts/pytorch_train
|
||||
sbatch run_slurm_train.sh
|
||||
|
||||
Fine-tuning
|
||||
~~~~~~~~~~~
|
||||
|
||||
Multi-node training with torchtune is supported. The provided SLURM script is pre-configured for Llama 3.3 70B.
|
||||
|
||||
To launch the training job on a SLURM cluster for Llama 3.3 70B, run the following commands from the MAD repository.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
huggingface-cli login # Get access to HF Llama model space
|
||||
huggingface-cli download meta-llama/Llama-3.3-70B-Instruct --local-dir ./models/Llama-3.3-70B-Instruct # Download the Llama 3.3 model locally
|
||||
# In the MAD repository
|
||||
cd scripts/pytorch_train
|
||||
sbatch Torchtune_Multinode.sh
|
||||
|
||||
.. note::
|
||||
|
||||
Information regarding benchmark setup:
|
||||
|
||||
* By default, Llama 3.3 70B is fine-tuned using ``alpaca_dataset``.
|
||||
* You can adjust the torchtune `YAML configuration file
|
||||
<https://github.com/pytorch/torchtune/blob/main/recipes/configs/llama3_3/70B_full_multinode.yaml>`__
|
||||
if you're using a different model.
|
||||
* The number of nodes and other parameters can be tuned in the SLURM script ``Torchtune_Multinode.sh``.
|
||||
* Set the ``mounting_paths`` inside the SLURM script.
|
||||
|
||||
Once the run is finished, you can find the log files in the ``result_torchtune/`` directory.
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
|
||||
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`pytorch-training-history` to find documentation for previous releases
|
||||
of the ``ROCm/pytorch-training`` Docker image.
|
||||
@@ -0,0 +1,305 @@
|
||||
.. meta::
|
||||
:description: How to train a model using PyTorch for ROCm.
|
||||
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
|
||||
|
||||
****************************************
|
||||
Training a model with Primus and PyTorch
|
||||
****************************************
|
||||
|
||||
`Primus <https://github.com/AMD-AGI/Primus>`__ is a unified and flexible
|
||||
LLM training framework designed to streamline training. It streamlines LLM
|
||||
training on AMD Instinct accelerators using a modular, reproducible configuration paradigm.
|
||||
Primus now supports the PyTorch torchtitan backend.
|
||||
|
||||
.. note::
|
||||
|
||||
Primus with the PyTorch torchtitan backend is intended to supersede the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
|
||||
See :doc:`pytorch-training` to see steps to run workloads without Primus.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
|
||||
|
||||
{% set dockers = data.dockers %}
|
||||
{% set docker = dockers[0] %}
|
||||
For ease of use, AMD provides a ready-to-use Docker image -- ``{{
|
||||
docker.pull_tag }}`` -- for MI300X series accelerators containing essential
|
||||
components for Primus and PyTorch training with
|
||||
Primus Turbo optimizations.
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Software component
|
||||
- Version
|
||||
|
||||
{% for component_name, component_version in docker.components.items() %}
|
||||
* - {{ component_name }}
|
||||
- {{ component_version }}
|
||||
{% endfor %}
|
||||
|
||||
.. _amd-primus-pytorch-model-support-v258:
|
||||
|
||||
Supported models
|
||||
================
|
||||
|
||||
The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
|
||||
Some instructions, commands, and training recommendations in this documentation might
|
||||
vary by model -- select one to get started.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.dockers[0] %}
|
||||
{% set model_groups = data.model_groups %}
|
||||
.. raw:: html
|
||||
|
||||
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||
<div class="row gx-0" style="display: none;">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row gx-0 pt-1">
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if models|length % 3 == 0 %}
|
||||
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% else %}
|
||||
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
.. seealso::
|
||||
|
||||
For additional workloads, including Llama 3.3, Llama 3.2, Llama 2, GPT OSS, Qwen, and Flux models,
|
||||
see the documentation :doc:`pytorch-training` (without Primus)
|
||||
|
||||
.. _amd-primus-pytorch-performance-measurements-v258:
|
||||
|
||||
System validation
|
||||
=================
|
||||
|
||||
Before running AI workloads, it's important to validate that your AMD hardware is configured
|
||||
correctly and performing optimally.
|
||||
|
||||
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
|
||||
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
|
||||
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
|
||||
before starting training.
|
||||
|
||||
To test for optimal performance, consult the recommended :ref:`System health benchmarks
|
||||
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
|
||||
system's configuration.
|
||||
|
||||
This Docker image is optimized for specific model configurations outlined
|
||||
below. Performance can vary for other training workloads, as AMD
|
||||
doesn’t test configurations and run conditions outside those described.
|
||||
|
||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/primus-pytorch-benchmark-models.yaml
|
||||
|
||||
{% set unified_docker = data.dockers[0] %}
|
||||
|
||||
Pull the Docker image
|
||||
=====================
|
||||
|
||||
Use the following command to pull the `Docker image <{{ unified_docker.docker_hub_url }}>`_ from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull {{ unified_docker.pull_tag }}
|
||||
|
||||
Run training
|
||||
============
|
||||
|
||||
{% set model_groups = data.model_groups %}
|
||||
|
||||
Once the setup is complete, choose between the following two workflows to start benchmarking training.
|
||||
For fine-tuning workloads and multi-node training examples, see :doc:`pytorch-training` (without Primus).
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following run command is tailored to {{ model.model }}.
|
||||
See :ref:`amd-primus-pytorch-model-support-v258` to switch to another available model.
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
2. For example, use this command to run the performance benchmark test on the {{ model.model }} model
|
||||
using one node with the {{ model.precision }} data type on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||
madengine run \
|
||||
--tags {{ model.mad_tag }} \
|
||||
--keep-model-dir \
|
||||
--live-output \
|
||||
--timeout 28800
|
||||
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-{{ model.mad_tag }}``. The latency and throughput reports of the
|
||||
model are collected in ``~/MAD/perf.csv``.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following run commands are tailored to {{ model.model }}.
|
||||
See :ref:`amd-primus-pytorch-model-support-v258` to switch to another available model.
|
||||
|
||||
.. rubric:: Download the Docker image and required packages
|
||||
|
||||
1. Use the following command to pull the Docker image from Docker Hub.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker pull {{ unified_docker.pull_tag }}
|
||||
|
||||
2. Run the Docker container.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker run -it \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
--network host \
|
||||
--ipc host \
|
||||
--group-add video \
|
||||
--cap-add SYS_PTRACE \
|
||||
--security-opt seccomp=unconfined \
|
||||
--privileged \
|
||||
-v $HOME:$HOME \
|
||||
-v $HOME/.ssh:/root/.ssh \
|
||||
--shm-size 64G \
|
||||
--name training_env \
|
||||
{{ unified_docker.pull_tag }}
|
||||
|
||||
Use these commands if you exit the ``training_env`` container and need to return to it.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
docker start training_env
|
||||
docker exec -it training_env bash
|
||||
|
||||
3. In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
|
||||
repository and navigate to the benchmark scripts directory
|
||||
``/workspace/MAD/scripts/pytorch_train``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD/scripts/pytorch_train
|
||||
|
||||
.. rubric:: Prepare training datasets and dependencies
|
||||
|
||||
1. The following benchmarking examples require downloading models and datasets
|
||||
from Hugging Face. To ensure successful access to gated repos, set your
|
||||
``HF_TOKEN``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export HF_TOKEN=$your_personal_hugging_face_access_token
|
||||
|
||||
2. Run the setup script to install libraries and datasets needed for benchmarking.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_setup.sh
|
||||
|
||||
.. rubric:: Pretraining
|
||||
|
||||
To start the pretraining benchmark, use the following command with the
|
||||
appropriate options. See the following list of options and their descriptions.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -t pretrain \
|
||||
-m {{ model.model_repo }} \
|
||||
-p $datatype \
|
||||
-s $sequence_length
|
||||
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Options
|
||||
- Description
|
||||
|
||||
{% for mode in available_modes %}
|
||||
* - {% if loop.first %}``$training_mode``{% endif %}
|
||||
- ``{{ mode }}``
|
||||
- {{ training_mode_descs[mode] }}
|
||||
{% endfor %}
|
||||
|
||||
* - ``$datatype``
|
||||
- ``BF16``{% if model.mad_tag == "primus_pyt_train_llama-3.1-8b" %} or ``FP8``{% endif %}
|
||||
- Currently, only Llama 3.1 8B supports FP8 precision.
|
||||
|
||||
* - ``$sequence_length``
|
||||
- Sequence length for the language model.
|
||||
- Between 2048 and 8192. 8192 by default.
|
||||
|
||||
.. rubric:: Benchmarking examples
|
||||
|
||||
Use the following command to run train {{ model.model }} with BF16 precision using Primus torchtitan.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -m {{ model.model_repo }}
|
||||
|
||||
To train {{ model.model }} with FP8 precision, use the following command.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
./pytorch_benchmark_report.sh -m {{ model.model_repo }} -p FP8
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
- For an introduction to Primus, see `Primus: A Lightweight, Unified Training
|
||||
Framework for Large Models on AMD GPUs <https://rocm.blogs.amd.com/software-tools-optimization/primus/README.html>`__.
|
||||
|
||||
- To learn more about MAD and the ``madengine`` CLI, see the `MAD usage guide <https://github.com/ROCm/MAD?tab=readme-ov-file#usage-guide>`__.
|
||||
|
||||
- To learn more about system settings and management practices to configure your system for
|
||||
AMD Instinct MI300X series accelerators, see `AMD Instinct MI300X system optimization <https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/system-optimization/mi300x.html>`_.
|
||||
|
||||
- For a list of other ready-made Docker images for AI with ROCm, see
|
||||
`AMD Infinity Hub <https://www.amd.com/en/developer/resources/infinity-hub.html#f-amd_hub_category=AI%20%26%20ML%20Models>`_.
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`previous-versions/pytorch-training-history` to find documentation for previous releases
|
||||
of the ``ROCm/pytorch-training`` Docker image.
|
||||
@@ -1,11 +1,18 @@
|
||||
:orphan:
|
||||
|
||||
.. meta::
|
||||
:description: How to train a model using PyTorch for ROCm.
|
||||
:keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
|
||||
|
||||
**************************************
|
||||
Training a model with PyTorch for ROCm
|
||||
Training a model with PyTorch on ROCm
|
||||
**************************************
|
||||
|
||||
.. note::
|
||||
|
||||
Primus with the PyTorch torchtitan backend is intended to supersede the :doc:`ROCm PyTorch training <pytorch-training>` workflow.
|
||||
See :doc:`primus-pytorch` for details.
|
||||
|
||||
PyTorch is an open-source machine learning framework that is widely used for
|
||||
model training with GPU-optimized components for transformer-based models.
|
||||
|
||||
@@ -49,7 +56,7 @@ vary by model -- select one to get started.
|
||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||
<div class="row col-10 pe-0">
|
||||
{% for model_group in model_groups %}
|
||||
<div class="col-3 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
<div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -87,9 +94,11 @@ vary by model -- select one to get started.
|
||||
{% for model_group in model_groups %}
|
||||
{% set models = model_group.models %}
|
||||
{% for model in models %}
|
||||
{% if model.training_modes %}
|
||||
* - {{ model.model }}
|
||||
- ``{{ model.training_modes | join('``, ``') }}``
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
@@ -152,20 +161,23 @@ Run training
|
||||
|
||||
.. tab-item:: MAD-integrated benchmarking
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following run command is tailored to {{ model.model }}.
|
||||
See :ref:`amd-pytorch-training-model-support` to switch to another available model.
|
||||
|
||||
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||
directory and install the required packages on the host machine.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
git clone https://github.com/ROCm/MAD
|
||||
cd MAD
|
||||
pip install -r requirements.txt
|
||||
|
||||
2. For example, use this command to run the performance benchmark test on the {{ model.model }} model
|
||||
using one node with the {{ model.precision }} data type on the host machine.
|
||||
|
||||
@@ -187,6 +199,17 @@ Run training
|
||||
|
||||
.. tab-item:: Standalone benchmarking
|
||||
|
||||
{% for model_group in model_groups %}
|
||||
{% for model in model_group.models %}
|
||||
|
||||
.. container:: model-doc {{ model.mad_tag }}
|
||||
|
||||
The following commands are tailored to {{ model.model }}.
|
||||
See :ref:`amd-pytorch-training-model-support` to switch to another available model.
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
.. rubric:: Download the Docker image and required packages
|
||||
|
||||
1. Use the following command to pull the Docker image from Docker Hub.
|
||||
@@ -388,7 +411,7 @@ Run training
|
||||
.. note::
|
||||
|
||||
Currently, FLUX models are not supported out-of-the-box on {{ unified_docker.pull_tag }}.
|
||||
To use FLUX, refer to the previous version of the ``pytorch-training`` Docker: :doc:`previous-versions/pytorch-training-v25.6`
|
||||
To use FLUX, refer to ``rocm/pytorch-training`` Docker: :doc:`previous-versions/pytorch-training-v25.6`
|
||||
|
||||
Occasionally, downloading the Flux dataset might fail. In the event of this
|
||||
error, manually download it from Hugging Face at
|
||||
|
||||
@@ -65,7 +65,7 @@ ROCm documentation is organized into the following categories:
|
||||
* [ROCm libraries](./reference/api-libraries.md)
|
||||
* [ROCm tools, compilers, and runtimes](./reference/rocm-tools.md)
|
||||
* [Accelerator and GPU hardware specifications](./reference/gpu-arch-specs.rst)
|
||||
* [Precision support](./reference/precision-support.rst)
|
||||
* [Data types and precision support](./reference/precision-support.rst)
|
||||
* [Graph safe support](./reference/graph-safe-support.rst)
|
||||
<!-- markdownlint-enable MD051 -->
|
||||
:::
|
||||
|
||||
@@ -34,6 +34,40 @@ For more information about ROCm hardware compatibility, see the ROCm `Compatibil
|
||||
- SGPR File (KiB)
|
||||
- GFXIP Major version
|
||||
- GFXIP Minor version
|
||||
*
|
||||
- MI355X
|
||||
- CDNA4
|
||||
- gfx950
|
||||
- 288
|
||||
- 256 (32 per XCD)
|
||||
- 64
|
||||
- 160
|
||||
- 256
|
||||
- 32 (4 per XCD)
|
||||
- 32
|
||||
- 16 per 2 CUs
|
||||
- 64 per 2 CUs
|
||||
- 512
|
||||
- 12.5
|
||||
- 9
|
||||
- 5
|
||||
*
|
||||
- MI350X
|
||||
- CDNA4
|
||||
- gfx950
|
||||
- 288
|
||||
- 256 (32 per XCD)
|
||||
- 64
|
||||
- 160
|
||||
- 256
|
||||
- 32 (4 per XCD)
|
||||
- 32
|
||||
- 16 per 2 CUs
|
||||
- 64 per 2 CUs
|
||||
- 512
|
||||
- 12.5
|
||||
- 9
|
||||
- 5
|
||||
*
|
||||
- MI325X
|
||||
- CDNA3
|
||||
|
||||
@@ -14,16 +14,26 @@ completed as an indivisible unit, preventing race conditions where simultaneous
|
||||
access to the same memory location could lead to incorrect or undefined
|
||||
behavior.
|
||||
|
||||
This document details the various support of atomic read-modify-write
|
||||
(atomicRMW) operations on gfx9, gfx10, gfx11, gfx12, MI100, MI200 and MI300 AMD
|
||||
GPUs. The atomics operation type behavior effected by the memory locations,
|
||||
memory granularity or scope of operations.
|
||||
This topic summarizes the support of atomic read-modify-write
|
||||
(atomicRMW) operations on AMD GPUs and accelerators. This includes gfx9, gfx10,
|
||||
gfx11, and gfx12 targets and the following series of Instinct™ series:
|
||||
|
||||
- MI100
|
||||
|
||||
- MI200
|
||||
|
||||
- MI300
|
||||
|
||||
- MI350
|
||||
|
||||
The atomics operation type behavior is affected by the memory locations, memory
|
||||
granularity, and scope of operations.
|
||||
|
||||
Memory locations:
|
||||
|
||||
- :ref:`Device memory <hip:device_memory>`, i.e. VRAM, the RAM on a discrete GPU
|
||||
device or in framebuffer carveout for APUs. This includes peer-device memory
|
||||
within an Infinity Fabric™ hive.
|
||||
- :ref:`Device memory <hip:device_memory>`, that is, VRAM, the RAM on a discrete
|
||||
GPU device or in framebuffer carveout for APUs. This includes peer-device
|
||||
memory within an Infinity Fabric™ hive.
|
||||
|
||||
- :ref:`Host memory <hip:host_memory>`: in DRAM associated with the CPU (or
|
||||
peer device memory using PCIe® (PCI Express) peer-to-peer). This can be two sub-types:
|
||||
@@ -69,10 +79,10 @@ Scopes of operations:
|
||||
Support summary
|
||||
================================================================================
|
||||
|
||||
AMD Instinct™ accelerators
|
||||
AMD Instinct accelerators
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
**MI300**
|
||||
**MI300 and MI350 series**
|
||||
|
||||
- All atomicRMW operations are forwarded out to the Infinity Fabric.
|
||||
- Infinity Fabric supports common integer and bitwise atomics, FP32 atomic add,
|
||||
@@ -85,7 +95,7 @@ AMD Instinct™ accelerators
|
||||
It will seem like atomics to the wave, but the CPU sees it as a non-atomic
|
||||
load-op-store sequence. This downgrades system-scope atomics to device-scope.
|
||||
|
||||
**MI200**
|
||||
**MI200 series**
|
||||
|
||||
- L2 cache and Infinity Fabric both support common integer and bitwise atomics.
|
||||
- L2 cache supports FP32 atomic add, packed-FP16 atomic add, and FP64 add,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,8 @@
|
||||
|
||||
| Version | Release date |
|
||||
| ------- | ------------ |
|
||||
| [7.0.1](https://rocm.docs.amd.com/en/docs-7.0.1/) | September 17, 2025 |
|
||||
| [7.0.0](https://rocm.docs.amd.com/en/docs-7.0.0/) | September 16, 2025 |
|
||||
| [6.4.3](https://rocm.docs.amd.com/en/docs-6.4.3/) | August 7, 2025 |
|
||||
| [6.4.2](https://rocm.docs.amd.com/en/docs-6.4.2/) | July 21, 2025 |
|
||||
| [6.4.1](https://rocm.docs.amd.com/en/docs-6.4.1/) | May 21, 2025 |
|
||||
|
||||
@@ -67,9 +67,9 @@ subtrees:
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: how-to/rocm-for-ai/training/benchmark-docker/primus-megatron.rst
|
||||
title: Train a model with Primus and Megatron-Core
|
||||
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
|
||||
title: Train a model with PyTorch
|
||||
title: Train a model with Primus and Megatron-LM
|
||||
- file: how-to/rocm-for-ai/training/benchmark-docker/primus-pytorch.rst
|
||||
title: Train a model with Primus and PyTorch
|
||||
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
|
||||
title: Train a model with JAX MaxText
|
||||
- file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
|
||||
@@ -106,6 +106,8 @@ subtrees:
|
||||
title: PyTorch inference performance testing
|
||||
- file: how-to/rocm-for-ai/inference/benchmark-docker/sglang.rst
|
||||
title: SGLang inference performance testing
|
||||
- file: how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.rst
|
||||
title: SGLang distributed inference with Mooncake
|
||||
- file: how-to/rocm-for-ai/inference/deploy-your-model.rst
|
||||
title: Deploy your model
|
||||
|
||||
@@ -147,7 +149,7 @@ subtrees:
|
||||
- file: how-to/setting-cus
|
||||
title: Set the number of CUs
|
||||
- file: how-to/Bar-Memory.rst
|
||||
title: Troubleshoot BAR access limitation
|
||||
title: Troubleshoot BAR access limitation
|
||||
- url: https://github.com/amd/rocm-examples
|
||||
title: ROCm examples
|
||||
|
||||
@@ -167,7 +169,9 @@ subtrees:
|
||||
- url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
|
||||
title: White paper
|
||||
- file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
|
||||
title: MI300 and MI200 Performance counter
|
||||
title: MI300 and MI200 performance counters
|
||||
- file: conceptual/gpu-arch/mi350-performance-counters.rst
|
||||
title: MI350 series performance counters
|
||||
- file: conceptual/gpu-arch/mi250.md
|
||||
title: MI250 microarchitecture
|
||||
subtrees:
|
||||
@@ -202,7 +206,7 @@ subtrees:
|
||||
- file: reference/gpu-arch-specs.rst
|
||||
- file: reference/gpu-atomics-operation.rst
|
||||
- file: reference/precision-support.rst
|
||||
title: Precision support
|
||||
title: Data types and precision support
|
||||
- file: reference/graph-safe-support.rst
|
||||
title: Graph safe support
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ div[data-param-k="model-group"][data-param-state="disabled"] {
|
||||
.model-param-head {
|
||||
background-color: var(--compat-head-color);
|
||||
padding: 0.15rem 0.15rem 0.15rem 0.67rem;
|
||||
border-right: solid 4px var(--compat-accent-color);
|
||||
border-right: solid 3px var(--compat-accent-color);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ ROCm is a software stack, composed primarily of open-source software, that
|
||||
provides the tools for programming AMD Graphics Processing Units (GPUs), from
|
||||
low-level kernels to high-level end-user applications.
|
||||
|
||||
.. image:: data/rocm-software-stack-6_4_0.jpg
|
||||
.. image:: data/rocm-software-stack-7_0_0.jpg
|
||||
:width: 800
|
||||
:alt: AMD's ROCm software stack and enabling technologies.
|
||||
:align: center
|
||||
@@ -45,6 +45,10 @@ Machine Learning & Computer Vision
|
||||
":doc:`rocJPEG <rocjpeg:index>`", "Library for decoding JPG images on AMD GPUs"
|
||||
":doc:`rocPyDecode <rocpydecode:index>`", "Provides access to rocDecode APIs in both Python and C/C++ languages"
|
||||
|
||||
.. note::
|
||||
|
||||
`rocCV <https://rocm.docs.amd.com/projects/rocCV/en/latest/index.html>`_ is an efficient GPU-accelerated library for image pre- and post-processing. rocCV is in an early access state. Using it on production workloads is not recommended.
|
||||
|
||||
Communication
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
70
tools/rocm-build/rocm-7.0.0.xml
Normal file
70
tools/rocm-build/rocm-7.0.0.xml
Normal file
@@ -0,0 +1,70 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-7.0.0"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="ROCm" revision="roc-7.0.x" />
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="aqlprofile" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocm-examples" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="rocprofiler-compute" />
|
||||
<project name="rocprofiler-systems" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="hip" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
<project name="hipother" />
|
||||
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||
<project name="half" />
|
||||
<project name="llvm-project" />
|
||||
<project name="spirv-llvm-translator" />
|
||||
<!-- gdb projects -->
|
||||
<project name="ROCdbgapi" />
|
||||
<project name="ROCgdb" />
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<!-- The following components have been migrated to rocm-libraries:
|
||||
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||
rocSPARSE rocThrust Tensile -->
|
||||
<project groups="mathlibs" name="rocm-libraries" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
<project groups="mathlibs" name="TransferBench" />
|
||||
<!-- Projects for OpenMP-Extras -->
|
||||
<project name="aomp" path="openmp-extras/aomp" />
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
|
||||
<project name="flang" path="openmp-extras/flang" />
|
||||
</manifest>
|
||||
70
tools/rocm-build/rocm-7.0.1.xml
Normal file
70
tools/rocm-build/rocm-7.0.1.xml
Normal file
@@ -0,0 +1,70 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<manifest>
|
||||
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
|
||||
<default revision="refs/tags/rocm-7.0.1"
|
||||
remote="rocm-org"
|
||||
sync-c="true"
|
||||
sync-j="4" />
|
||||
<!--list of projects for ROCm-->
|
||||
<project name="ROCm" revision="roc-7.0.x" />
|
||||
<project name="ROCK-Kernel-Driver" />
|
||||
<project name="ROCR-Runtime" />
|
||||
<project name="amdsmi" />
|
||||
<project name="aqlprofile" />
|
||||
<project name="rdc" />
|
||||
<project name="rocm_bandwidth_test" />
|
||||
<project name="rocm_smi_lib" />
|
||||
<project name="rocm-core" />
|
||||
<project name="rocm-examples" />
|
||||
<project name="rocminfo" />
|
||||
<project name="rocprofiler" />
|
||||
<project name="rocprofiler-register" />
|
||||
<project name="rocprofiler-sdk" />
|
||||
<project name="rocprofiler-compute" />
|
||||
<project name="rocprofiler-systems" />
|
||||
<project name="roctracer" />
|
||||
<!--HIP Projects-->
|
||||
<project name="hip" />
|
||||
<project name="hip-tests" />
|
||||
<project name="HIPIFY" />
|
||||
<project name="clr" />
|
||||
<project name="hipother" />
|
||||
<!-- The following projects are all associated with the AMDGPU LLVM compiler -->
|
||||
<project name="half" />
|
||||
<project name="llvm-project" />
|
||||
<project name="spirv-llvm-translator" />
|
||||
<!-- gdb projects -->
|
||||
<project name="ROCdbgapi" />
|
||||
<project name="ROCgdb" />
|
||||
<project name="rocr_debug_agent" />
|
||||
<!-- ROCm Libraries -->
|
||||
<project groups="mathlibs" name="AMDMIGraphX" />
|
||||
<project groups="mathlibs" name="MIVisionX" />
|
||||
<project groups="mathlibs" name="ROCmValidationSuite" />
|
||||
<project groups="mathlibs" name="composable_kernel" />
|
||||
<project groups="mathlibs" name="hipSOLVER" />
|
||||
<project groups="mathlibs" name="hipTensor" />
|
||||
<project groups="mathlibs" name="hipfort" />
|
||||
<project groups="mathlibs" name="rccl" />
|
||||
<project groups="mathlibs" name="rocAL" />
|
||||
<project groups="mathlibs" name="rocALUTION" />
|
||||
<project groups="mathlibs" name="rocDecode" />
|
||||
<project groups="mathlibs" name="rocJPEG" />
|
||||
<!-- The following components have been migrated to rocm-libraries:
|
||||
hipBLAS-common hipBLAS hipBLASLt hipCUB
|
||||
hipFFT hipRAND hipSPARSE hipSPARSELt
|
||||
MIOpen rocBLAS rocFFT rocPRIM rocRAND
|
||||
rocSPARSE rocThrust Tensile -->
|
||||
<project groups="mathlibs" name="rocm-libraries" />
|
||||
<project groups="mathlibs" name="rocPyDecode" />
|
||||
<project groups="mathlibs" name="rocSHMEM" />
|
||||
<project groups="mathlibs" name="rocSOLVER" />
|
||||
<project groups="mathlibs" name="rocWMMA" />
|
||||
<project groups="mathlibs" name="rocm-cmake" />
|
||||
<project groups="mathlibs" name="rpp" />
|
||||
<project groups="mathlibs" name="TransferBench" />
|
||||
<!-- Projects for OpenMP-Extras -->
|
||||
<project name="aomp" path="openmp-extras/aomp" />
|
||||
<project name="aomp-extras" path="openmp-extras/aomp-extras" />
|
||||
<project name="flang" path="openmp-extras/flang" />
|
||||
</manifest>
|
||||
Reference in New Issue
Block a user