From 635838e7efbe7f15f6a86d772a127e6e22b80624 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 12 Mar 2025 20:36:57 +0100 Subject: [PATCH] Add atomics operation support page --- .gitignore | 1 + .wordlist.txt | 19 + docs/conf.py | 13 +- .../cas-atomics_nopcie_gfx.csv | 325 +++++++ .../cas-atomics_nopcie_instinct.csv | 325 +++++++ .../cas-atomics_pcie_gfx.csv | 325 +++++++ .../cas-atomics_pcie_instinct.csv | 325 +++++++ .../hw-atomics_nopcie_gfx.csv | 349 ++++++++ .../hw-atomics_nopcie_instinct.csv | 325 +++++++ .../hw-atomics_pcie_gfx.csv | 349 ++++++++ .../hw-atomics_pcie_instinct.csv | 325 +++++++ docs/extension/csv-to-list-table.py | 138 +++ docs/reference/gpu-atomics-operation.rst | 791 ++++++++++++++++++ docs/reference/precision-support.rst | 19 +- docs/sphinx/_toc.yml.in | 20 +- 15 files changed, 3631 insertions(+), 18 deletions(-) create mode 100644 docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_gfx.csv create mode 100644 docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_instinct.csv create mode 100644 docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_gfx.csv create mode 100644 docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_instinct.csv create mode 100644 docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_gfx.csv create mode 100644 docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_instinct.csv create mode 100644 docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_gfx.csv create mode 100644 docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_instinct.csv create mode 100644 docs/extension/csv-to-list-table.py create mode 100644 docs/reference/gpu-atomics-operation.rst diff --git a/.gitignore b/.gitignore index 28fd470eb..38cff813b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ build # documentation artifacts _build/ _images/ +__pycache__/ _static/ _templates/ _toc.yml diff --git a/.wordlist.txt b/.wordlist.txt index a784fd483..50f37fd40 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -18,12 +18,14 @@ APBDIS APIC APIs APU +APUs ASIC ASICs ASan ASAN ASm ATI +atomicRMW AddressSanitizer AlexNet Andrej @@ -36,6 +38,7 @@ Blit Blockwise Bluefield Bootloader +CAS CCD CDNA CHTML @@ -78,6 +81,8 @@ DBRX DDR DF DGEMM +dGPU +dGPUs DIMM DKMS DL @@ -98,6 +103,7 @@ DeepSpeed Dependabot Deprecations DevCap +DirectX Dockerfile Doxygen ELMo @@ -177,6 +183,7 @@ IDE IDEs IFWI IMDb +IncDec IOMMU IOP IOPM @@ -272,6 +279,7 @@ NaN Nano Navi Noncoherently +NoReturn NousResearch's NumPy OAM @@ -503,15 +511,19 @@ bfloat bilinear bitcode bitsandbytes +bitwise +Bitwise blit bootloader boson bosons br +BrainFloat buildable bursty bzip cacheable +carveout cd centos centric @@ -590,6 +602,7 @@ ffmpeg filesystem fortran fp +framebuffer gRPC galb gcc @@ -672,6 +685,7 @@ macOS matchers microarchitecture migraphx +migratable miopen miopengemm mivisionx @@ -684,6 +698,9 @@ mvffr namespace namespaces nanoGPT +NCS +NOP +NVLink num numref ocl @@ -835,6 +852,7 @@ tracebacks txt uarch uncached +uncacheable uncorrectable underoptimized unhandled @@ -879,6 +897,7 @@ wrreq wzo xargs xGMI +xPacked xz yaml ysvmadyb diff --git a/docs/conf.py b/docs/conf.py index 03b1a72a3..d2517d013 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,6 +30,7 @@ if os.environ.get("READTHEDOCS", "") == "True": # configurations for PDF output by Read the Docs project = "ROCm Documentation" +project_path = os.path.abspath(".").replace("\\", "/") author = "Advanced Micro Devices, Inc." copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved." version = "6.4.0" @@ -94,7 +95,7 @@ external_toc_path = "./sphinx/_toc.yml" # Add the _extensions directory to Python's search path sys.path.append(str(Path(__file__).parent / 'extension')) -extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref"] +extensions = ["rocm_docs", "sphinx_reredirects", "sphinx_sitemap", "sphinxcontrib.datatemplates", "version-ref", "csv-to-list-table"] compatibility_matrix_file = str(Path(__file__).parent / 'compatibility/compatibility-matrix-historical-6.0.csv') @@ -122,3 +123,13 @@ html_theme_options = {"link_main_doc": False} redirects = {"reference/openmp/openmp": "../../about/compatibility/openmp.html"} numfig = False + +html_context = { + "project_path" : {project_path}, + "gpu_type" : [('AMD Instinct accelerators', 'intrinsic'), ('AMD gfx families', 'gfx'), ('NVIDIA families', 'nvidia') ], + "atomics_type" : [('HW atomics', 'hw-atomics'), ('CAS emulation', 'cas-atomics')], + "pcie_type" : [('No PCIe atomics', 'nopcie'), ('PCIe atomics', 'pcie')], + "memory_type" : [('Device DRAM', 'device-dram'), ('Migratable Host DRAM', 'migratable-host-dram'), ('Pinned Host DRAM', 'pinned-host-dram')], + "granularity_type" : [('Coarse-grained', 'coarse-grained'), ('Fine-grained', 'fine-grained')], + "scope_type" : [('Device', 'device'), ('System', 'system')] +} diff --git a/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_gfx.csv b/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_gfx.csv new file mode 100644 index 000000000..3fc3385ab --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_gfx.csv @@ -0,0 +1,325 @@ +Atomic,gfx9 dGPU,gfx9 APU,gfx10 dGPU,gfx11 dGPU,gfx11 APU,gfx12 dGPU +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicSub,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicInc,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicDec,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atoimcExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicSub,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicInc,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicDec,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atoimcExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade - CAS +32 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade - CAS +64 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicSub,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicInc,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicDec,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atoimcExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicSub,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicInc,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicDec,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atoimcExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade - CAS +32 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +32 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicExch,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAnd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicOr,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit atomicXor,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS diff --git a/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_instinct.csv b/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_instinct.csv new file mode 100644 index 000000000..cb909bbb0 --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/cas-atomics_nopcie_instinct.csv @@ -0,0 +1,325 @@ +Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS +32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ CAS +64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,✅ CAS,✅ CAS +32 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicSub,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicInc,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicDec,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atoimcExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicExch,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicCAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicOr,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit atomicXor,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade - CAS,✅ CAS diff --git a/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_gfx.csv b/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_gfx.csv new file mode 100644 index 000000000..7bd152ccd --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_gfx.csv @@ -0,0 +1,325 @@ +Atomic,gfx9 dGPU,gfx9 APU,gfx10 dGPU,gfx11 dGPU,gfx11 APU,gfx12 dGPU +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS diff --git a/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_instinct.csv b/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_instinct.csv new file mode 100644 index 000000000..74bbfed10 --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/cas-atomics_pcie_instinct.csv @@ -0,0 +1,325 @@ +Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicSub,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicInc,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicDec,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atoimcExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicExch,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicOr,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit atomicXor,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS diff --git a/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_gfx.csv b/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_gfx.csv new file mode 100644 index 000000000..da2039d10 --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_gfx.csv @@ -0,0 +1,349 @@ +Atomic,gfx9 dGPU,gfx9 APU,gfx10 dGPU,gfx11 dGPU,gfx11 APU,gfx12 dGPU +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ Native +32 bit atoimcExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade +32 bit atoimcExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,n/a,n/a,n/a,n/a,n/a,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,✅ Native +32 bit atoimcExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAdd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +64 bit float atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMin,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +64 bit float atomicMax,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS +16bx2 half2 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade +16bx2 bfloat162 atomicAdd,❌ NOP,✅ CAS,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade +32 bit atoimcExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicExch,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicCAS,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a diff --git a/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_instinct.csv b/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_instinct.csv new file mode 100644 index 000000000..18f0bf55c --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/hw-atomics_nopcie_instinct.csv @@ -0,0 +1,325 @@ +Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native +32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAdd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +32 bit float atomicMin,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS +32 bit float atomicMax,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade - CAS,✅ CAS +64 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMin,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMax,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 bfloat162 atomicAdd,❌ NOP,❌ NOP,✅ CAS,⚠️ Scope Downgrade,✅ Native +32 bit atoimcExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicExch,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicCAS,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native diff --git a/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_gfx.csv b/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_gfx.csv new file mode 100644 index 000000000..0c2cc65de --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_gfx.csv @@ -0,0 +1,349 @@ +Atomic,gfx9 dGPU,gfx9 APU,gfx10 dGPU,gfx11 dGPU,gfx11 APU,gfx12 dGPU +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,✅ Native,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,✅ Native +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,✅ Native +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicInc,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicDec,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicMax,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +32 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade +64 bit float atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMin,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicMax,✅ CAS,✅ CAS,❌ NOP,✅ CAS,✅ CAS,✅ CAS +16bx2 half2 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +32 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicOr,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +64 bit atomicXor,❌ NOP,✅ Native,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade +128 bit atomicCAS,n/a,n/a,n/a,n/a,n/a,n/a +128 bit atomicExch,n/a,n/a,n/a,n/a,n/a,n/a diff --git a/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_instinct.csv b/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_instinct.csv new file mode 100644 index 000000000..cf4136864 --- /dev/null +++ b/docs/data/reference/gpu-atomics-operation/hw-atomics_pcie_instinct.csv @@ -0,0 +1,325 @@ +Atomic,MI100,MI200 PCIe,MI200 A+A,MI300X,MI300A +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicInc,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicDec,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMax,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 half2 atomicAdd,✅ NoReturn,✅ Native,✅ Native,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicOr,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicXor,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,✅ Native,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,✅ Native,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,✅ Native,✅ Native +32 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicSub,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicInc,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicDec,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicAdd,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicMin,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicMax,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit float atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +32 bit float atomicMin,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +32 bit float atomicMax,✅ CAS,✅ CAS,✅ CAS,✅ CAS,✅ CAS +64 bit float atomicAdd,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMin,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +64 bit float atomicMax,✅ CAS,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 half2 atomicAdd,❌ NOP,❌ NOP,❌ NOP,⚠️ Scope Downgrade,✅ Native +16bx2 bfloat162 atomicAdd,✅ CAS,✅ CAS,✅ CAS,⚠️ Scope Downgrade,✅ Native +32 bit atoimcExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +32 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +32 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicExch,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicCAS,✅ Native,✅ Native,✅ Native,✅ Native,✅ Native +64 bit atomicAnd,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicOr,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native +64 bit atomicXor,❌ NOP,❌ NOP,✅ Native,⚠️ Scope Downgrade,✅ Native diff --git a/docs/extension/csv-to-list-table.py b/docs/extension/csv-to-list-table.py new file mode 100644 index 000000000..1ad87ac3d --- /dev/null +++ b/docs/extension/csv-to-list-table.py @@ -0,0 +1,138 @@ +import os +import csv +import re +from docutils.parsers.rst import Directive + +class CsvToListTable(Directive): + required_arguments = 0 + optional_arguments = 1 + final_argument_whitespace = True + option_spec = { + 'file': str, + 'include-header': lambda x: x.lower() == 'true', # Boolean option + 'rows': str, # Comma-separated list of row ranges and indices + 'widths': lambda x: [int(i) for i in x.split(',')], + 'columns': lambda x: [int(i) for i in x.split(',')] # Columns to include (by index) + } + + def run(self): + env = self.state.document.settings.env + src_dir = os.path.abspath(env.srcdir) + + # Get options + file_path = self.options.get('file') + if not file_path: + raise self.error("The :file: option is required.") + + full_file_path = os.path.join(src_dir, file_path) + + # Check if the file exists + if not os.path.exists(full_file_path): + raise self.error(f"CSV file {full_file_path} does not exist.") + + include_header = self.options.get('include-header', True) + rows_option = self.options.get('rows', '') + widths = self.options.get('widths', []) + columns = self.options.get('columns', []) + + # Parse the `:rows:` option + selected_rows = self.parse_rows_option(rows_option) + + # Read CSV and process rows + with open(full_file_path, newline='', encoding='utf-8') as csvfile: + reader = csv.reader(csvfile) + data = list(reader) + + if not data: + raise self.error(f"CSV file {full_file_path} is empty or could not be read.") + + # Include the header if specified + if include_header: + headers = data[0] + table_data = [data[i] for i in selected_rows if 0 <= i < len(data)] + else: + headers = [] + table_data = [data[i] for i in selected_rows if 0 <= i < len(data)] + + # If columns are specified, filter the columns + if columns: + headers = [headers[i] for i in columns] if headers else [] + table_data = [[row[i] for i in columns] for row in table_data] + + # Generate the list-table RST content + list_table_rst = self.generate_list_table(headers, table_data, widths) + + # Parse the generated RST content and return the nodes + self.state_machine.insert_input(list_table_rst.splitlines(), full_file_path) + return [] + + def parse_rows_option(self, rows_option): + """ + Parse the `:rows:` option and return a list of selected row indices. + """ + if not rows_option: + return [] + + row_indices = set() + ranges = rows_option.split(',') + + for r in ranges: + if '-' in r: + start, end = map(int, r.split('-')) + row_indices.update(range(start - 1, end)) # Convert to 0-based indexing + else: + row_indices.add(int(r) - 1) # Convert to 0-based indexing + + return sorted(row_indices) + + def generate_list_table(self, headers, table_data, widths): + """Generate RST list-table content from CSV data.""" + rows = [] + rows.extend([headers] if headers else []) + rows.extend(table_data) + + # Start the list-table directive + list_table_lines = [".. list-table::"] + + # Add widths if specified + if widths: + widths_str = ", ".join(str(w) for w in widths) + list_table_lines.append(f" :widths: {widths_str}") + + # Add header rows if there's a header + if headers: + list_table_lines.append(f" :header-rows: 1") + + list_table_lines.append("") # Blank line after options + + # Add the rows + for row in rows: + row_line = " * - | " + self.format_cell(row[0]) # First cell + for cell in row[1:]: + row_line += f"\n - | {self.format_cell(cell)}" + list_table_lines.append(row_line) + + return "\n".join(list_table_lines) + + def format_cell(self, cell): + """ + Format a cell's content for multi-line text handling, including automatic line break detection. + """ + # Replace common line-break markers with actual line breaks + for marker in ["|br|", "\\n", "|"]: + cell = cell.replace(marker, "\n") + + # Split the cell content into lines + lines = cell.splitlines() + if len(lines) > 1: + # For multi-line content, indent subsequent lines + return f"\n | ".join(lines) + return cell + + +def setup(app): + app.add_directive('csv-to-list-table', CsvToListTable) + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/docs/reference/gpu-atomics-operation.rst b/docs/reference/gpu-atomics-operation.rst new file mode 100644 index 000000000..3f6e2f7c6 --- /dev/null +++ b/docs/reference/gpu-atomics-operation.rst @@ -0,0 +1,791 @@ +.. meta:: + :description: AMD Instinct accelerator, AMD Radeon PRO, and AMD Radeon GPU + atomics operations information + :keywords: Atomics operations, atomic bitwise functions, atomics add, atomics + subtraction, atomics exchange, atomics min, atomics max + +.. _hw_atomics_operation_support: + +Hardware atomics operation support +================================================================================ + +:ref:`Atomic operations ` guarantee that the operation is +completed as an indivisible unit, preventing race conditions where simultaneous +access to the same memory location could lead to incorrect or undefined +behavior. + +This document details the various support of atomic read-modify-write +(atomicRMW) operations on gfx9, gfx10, gfx11, gfx12, MI100, MI200 and MI300 AMD +GPUs. The atomics operation type behavior effected by the memory locations, +memory granularity or scope of operations. + +Memory locations: + +- :ref:`Device memory `, i.e. VRAM, the RAM on a discrete GPU + device or in framebuffer carveout for APUs. This includes peer-device memory + within an Infinity Fabric™ hive. + +- :ref:`Host memory `: in DRAM associated with the CPU (or + peer device memory using PCIe® (PCI Express) peer-to-peer). This can be two sub-types: + + - Migratable memory: memory that is currently residing in host DRAM, but + which can be migrated back to device memory. For example, + ``hipMallocManaged()`` or :ref:`unified memory ` + allocations. + + - :ref:`Pinned memory `: memory that is in host memory + and cannot be migrated to the device (not necessarily pinned to a particular + physical address, but can't be moved to device memory). ``hipHostMalloc()``, + for example. + +Memory granularity or :ref:`coherence `: + +- Coarse-grained memory + + - This memory can be used for device-scope synchronization during the + execution of a single GPU kernel. Any system-scope atomics sent to this type + of memory will not achieve system-scope coherency and will instead be + downgraded to device-scope as per the programming model. + + - This type of memory only available on AMD GPUs. + +- Fine-grained memory + + - This memory can be used for device and system-scope synchronization during + the execution of a single GPU kernel. + +Scopes of operations: + +- Device-scope or agent-scope + + - This atomic should happen atomically from the point of view of every thread + within the device that the atomic-executing thread is in. + +- System-scope + + - This atomic should happen atomically from the point of view of every thread + in all devices and in the CPUs. + +Support summary +================================================================================ + +AMD Instinct™ accelerators +-------------------------------------------------------------------------------- + +**MI300** + +- All atomicRMW operations are forwarded out to the Infinity Fabric. +- Infinity Fabric supports common integer and bitwise atomics, FP32 atomic add, + packed-FP16 atomic add, packed-BF16 atomic add, and FP64 add, min, and max. +- In discrete GPUs (dGPUs), if the data is stored in host memory, the atomic + will be forwarded from the Infinity Fabric to PCIe. +- If the PCIe bus does not support the requested atomic, the GPU's PCIe + controller changes it into a load-op-store sequence. All waves on the chip + submitting atomics to that address will stall waiting for the load-op-store. + It will seem like atomics to the wave, but the CPU sees it as a non-atomic + load-op-store sequence. This downgrades system-scope atomics to device-scope. + +**MI200** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. +- L2 cache supports FP32 atomic add, packed-FP16 atomic add, and FP64 add, + min, and max. +- The Infinity Fabric does not support FP32 atomic add, packed-FP16 atomic add, + and FP64 add, min, and max atomics and these commands cannot be sent to the + Infinity Fabric. +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. +- Fine-grained memory is marked write-uncacheable through the page tables. +- Atomics that hit write-uncached memory are forwarded to the Infinity Fabric. +- If the uncached data is stored in host memory on a PCIe system, the atomic + will be forwarded from Infinity Fabric to PCIe. Any atomic not supported by + the PCIe bus will be a NOP and give incorrect result. +- If the uncached data is stored in host memory on an A+A system (system with + AMD CPU and AMD GPU connected via Infinity Fabric), the atomic operation will + be forwarded to the remote location and will succeed if supported by Infinity + Fabric. +- If the float atomics access write-uncached memory, they cannot be forwarded to + the Infinity Fabric, resulting in a NOP and an incorrect outcome. + +**MI100** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. +- L2 cache supports no returns (NoReturn) versions of packed-FP16 and FP32 + atomic adds, that cannot return data. +- The Infinity Fabric does not support packed-FP16 or FP32 atomic adds, + preventing these commands from being transmitted through it. +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. +- Fine-grained memory is marked uncacheable through the page tables. +- Atomics that hit uncached memory are forwarded to the Infinity Fabric. +- If the uncached data is stored in host memory, the atomic will be forwarded + from Infinity Fabric to PCIe. Any atomic not supported by the PCIe bus will + be a NOP and give incorrect result. +- If an float atomic add hits uncached memory, it cannot be forwarded to the + Infinity Fabric so it will NOP and give incorrect result. + +AMD gfx generic targets +-------------------------------------------------------------------------------- + +**gfx9** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. +- Fine-grained memory is marked uncacheable through the page tables. +- Atomics that hit uncached memory are forwarded to the Infinity Fabric. +- In a dGPU: if the uncached data is stored in host memory, the atomic will be + forwarded from Infinity Fabric to PCIe. Any atomic not supported by the PCIe + bus will be a NOP and. + +**gfx10** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. +- Fine-grained memory is marked uncacheable through the page tables. +- Atomics that hit uncached memory are forwarded to the Infinity Fabric. +- In a dGPU: if the uncached data is stored in host memory, the atomic will be + forwarded from Infinity Fabric to PCIe. Any atomic not supported by the PCIe + bus will be a NOP and give incorrect result. +- Supports floating-point atomic min/max. +- The Infinity Fabric does not support floating-point atomic min/max atomics + and these commands cannot be sent to the Infinity Fabric. +- If the floating-point atomics hit uncached memory, they cannot be forwarded to + the Infinity Fabric, so they will NOP and give incorrect result. + +**gfx11** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. +- L2 cache supports FP32 atomic add, min and max. +- The Infinity Fabric does not support FP32 atomic add, min and max atomics and + these commands cannot be sent to the Infinity Fabric. +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. +- Fine-grained memory is marked uncacheable through the page tables. +- Atomics that hit write-uncached memory are forwarded to the Infinity Fabric. +- In a dGPU: if the uncached data is stored in host memory, the atomic will be + forwarded from Infinity Fabric to PCIe. Any atomic not supported by the PCIe + bus will be a NOP and give incorrect result. +- If the float atomics hit uncached memory, they cannot be forwarded to the + Infinity Fabric, so they will NOP and give incorrect result. + +**gfx12** + +- L2 cache and Infinity Fabric both support common integer and bitwise atomics. + +- L2 cache and Infinity Fabric both also support FP32 atomic add, min and max, + and packed-FP16 atomic add, and packed-BF16 atomic add. + +- Coarse-grained memory is marked as cacheable, and atomic operations will be + processed in the L2 cache. + +- Fine-grained device memory is marked uncacheable through the page tables. + + - Atomics that hit write-uncached memory are forwarded to the Infinity Fabric. + +- Fine-grained system memory is marked as cacheable through the page tables. + + - Device-scope atomic operations will process in the L2 cache. + + - System-scope atomic operations will bypass the L2 cache and be forwarded to + the Infinity Fabric. + +- Atomics that hit write-uncached memory are forwarded to the Infinity Fabric. + +- In dGPUs, if the data is stored in host memory, the atomic will be forwarded + from the Infinity Fabric to PCIe. + +- If the PCIe bus does not support the requested atomic, the GPU's PCIe + controller changes it into a load-op-store sequence. All waves on the chip + submitting atomics to that address will stall waiting for the load-op-store. + It will seem like atomics to the wave, but the CPU sees it as a non-atomic + load-op-store sequence. This downgrades system-scope atomics to device-scope. + +GPUs atomics support +================================================================================ + +This section presents a series of tables that show the level of atomic +operations support for the different hardware devices described above, and +different datatypes, different operations and different scopes. + +Hardware atomics support refers to the ability of GPUs to natively perform +atomic operations—special low-level operations that ensure data consistency when +multiple threads access and modify memory concurrently. + +CAS (Compare-and-Swap) atomic support refers to the hardware or software +capability to perform an atomic Compare-and-Swap operation. + +PCIe atomics are a feature of the PCIe interface that enable +atomic operations between devices and hosts across the PCIe bus. For further +information, please check :doc:`../conceptual/pcie-atomics`. + +The tables that follow show the correctness of atomics operations on the +hardware using the following notations: + +- ✅: Produces the correct answer. + +- ⚠️: Produces the correct answer, but works only at a weaker scope. + +- ❌: The atomics operation fails. + +The tables show the different types of atomic operations used by specific +devices: + +- Native: Computes the correct result using a hardware-native atomic + instruction. + +- CAS: Generates the correct result, but the atomic operation is implemented by + the compiler for this ISA using a compare-and-swap emulation loop. + +- ✅ NoReturn: Produces the correct correct result but does not precisely + conform to the atomic API. + +- Scope Downgrade: Generates the correct result but operates at a weaker scope + than requested. For example, if a user specifies a system-scope atomic, the + operation may only function at the device scope. + +- NOP: The atomic operation is not executed on the target location, and the + requesting thread receives back 0 as a return value. + +- n/a: The atomic type is not supported and cannot be executed on the specific + hardware. + +The tables selectors or options are the following: + +- Highest level option: + + - "HW atomics", where software attempts to use hardware atomics. + + - "CAS emulation", where software attempts to use CAS emulation. + +- Second-level option: + + - "No PCIe atomics" means the system does not support PCIe atomics between + the accelerator and peer/host-memory. + + - "PCIe atomics" means the system supports PCIe atomics between the + accelerator and peer/host-memory. + +- The third-level option is the memory granularity of the memory target. + +- The final option is the scope of atomic access. + +Integer atomics operations +-------------------------------------------------------------------------------- + +The integer type atomic operations that are supported by different hardware. + +- 32 bit integer + + - Add + + - Subtract + + - Min + + - Max + + - IncDec + +- 64 bit integer + + - Add + + - Min + + - Max + +AMD Instinct accelerators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The integer type atomic operations that are supported by different AMD +Instinct accelerators listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=2, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_instinct.csv" %} + {# If we have a new CSV file, reset the offset #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 2 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Compute the row numbers for this leaf #} + {% set start = ns.offset %} + {% set end = ns.offset + 8 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset: block (9 rows) plus gap (18 rows) #} + {% set ns.offset = ns.offset + 9 + 18 %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. + +AMD gfx generic targets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The integer type atomic operations that are supported by different gfx generic +targets listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=2, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_gfx.csv" %} + {# If we switch CSV files, reset the offset to 2 (to skip the header row) #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 2 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Determine the increment based on atomics_type_key #} + {% if atomics_type_key == "hw-atomics" %} + {% set increment = 20 %} + {% elif atomics_type_key == "cas-atomics" %} + {% set increment = 18 %} + {% endif %} + + {# Compute start and end rows (end is inclusive) #} + {% set start = ns.offset %} + {% set end = ns.offset + 8 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset for the next table in this CSV #} + {% set ns.offset = ns.offset + 9 + increment %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. + +Bitwise atomics operations +-------------------------------------------------------------------------------- + +The bitwise atomic operations that are supported by different hardware. + +- 32 bit bitwise + + - Exchange + + - Compare-and-Swap (CAS) + + - AND + + - OR + + - XOR + +- 64 bit bitwise + + - Exchange + + - CAS + + - AND + + - OR + + - XOR + + +.. note:: + + 128-bit bitwise Exchange and CAS are not supported on AMD GPUs + +AMD Instinct accelerators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The bitwise atomic operations that are supported by different AMD Instinct +accelerators listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=19, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_instinct.csv" %} + {# If we have a new CSV file, reset the offset #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 19 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Compute the row numbers for this leaf #} + {% set start = ns.offset %} + {% set end = ns.offset + 9 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset: block (10 rows) plus gap (17 rows) #} + {% set ns.offset = ns.offset + 10 + 17 %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. + +AMD gfx generic targets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The bitwise atomic operations that are supported by different AMD gfx generic +targets listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=19, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_gfx.csv" %} + {# If we switch CSV files, reset the offset to 2 (to skip the header row) #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 19 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Determine the increment based on atomics_type_key #} + {% if atomics_type_key == "hw-atomics" %} + {% set increment = 19 %} + {% elif atomics_type_key == "cas-atomics" %} + {% set increment = 17 %} + {% endif %} + + {# Compute start and end rows (end is inclusive) #} + {% set start = ns.offset %} + {% set end = ns.offset + 9 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset for the next table in this CSV #} + {% set ns.offset = ns.offset + 10 + increment %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. + +Float atomics operations +-------------------------------------------------------------------------------- + +The float types atomic operations that are supported by different hardware. + +- 32-bit IEEE 754 floating point ('single precision', FP32) + + - Add + + - Min + + - Max + +- 64-bit IEEE 754 floating point ('double precision', FP64) + + - Add + + - Min + + - Max + +- 16-bit IEEE 754 floating point ('half precision", FP16) + + - Add + +- 2xPacked 16-bit IEEE 754 floating point ('half precision', FP16) + + - Add + +- BrainFloat-16 floating point (BF16) + + - Add + +- 2xPacked BrainFloat-16 floating point (BF16) + + - Add + +AMD Instinct accelerators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The float type atomic operations that are supported by different AMD Instinct +accelerators listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=11, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_instinct.csv" %} + {# If we have a new CSV file, reset the offset #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 11 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Compute the row numbers for this leaf #} + {% set start = ns.offset %} + {% set end = ns.offset + 7 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset: block (8 rows) plus gap (19 rows) #} + {% set ns.offset = ns.offset + 8 + 19 %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. + +AMD gfx generic targets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The float types atomic operations that are supported by different AMD gfx +generic targets listed in the following table. + +.. + +.. The relative path not working in datatemplate, that's why we also need the absolute path of docs folder. + +.. datatemplate:nodata:: + + {% set ns = namespace(offset=11, previous_csv='') %} + + .. tab-set:: + {% for (atomics_type_text, atomics_type_key) in config.html_context['atomics_type'] %} + .. tab-item:: {{ atomics_type_text }} + :sync: {{ atomics_type_key }} + + .. tab-set:: + {% for (pcie_type_text, pcie_type_key) in config.html_context['pcie_type'] %} + .. tab-item:: {{ pcie_type_text }} + :sync: {{ pcie_type_key }} + + .. tab-set:: + {% for (memory_type_text, memory_type_key) in config.html_context['memory_type'] %} + .. tab-item:: {{ memory_type_text }} + :sync: {{ memory_type_key }} + + .. tab-set:: + {% for (granularity_type_text, granularity_type_key) in config.html_context['granularity_type'] %} + .. tab-item:: {{ granularity_type_text }} + :sync: {{ granularity_type_key }} + + .. tab-set:: + {% for (scope_type_text, scope_type_key) in config.html_context['scope_type'] %} + .. tab-item:: {{ scope_type_text }} + :sync: {{ scope_type_key }} + + {# Build the CSV file path for this branch #} + {% set current_csv = "data/reference/gpu-atomics-operation/" + ~ atomics_type_key ~ "_" ~ pcie_type_key ~ "_gfx.csv" %} + {# If we switch CSV files, reset the offset to 2 (to skip the header row) #} + {% if current_csv != ns.previous_csv %} + {% set ns.offset = 11 %} + {% endif %} + {% set ns.previous_csv = current_csv %} + + {# Determine the increment based on atomics_type_key #} + {% if atomics_type_key == "hw-atomics" %} + {% set increment = 21 %} + {% elif atomics_type_key == "cas-atomics" %} + {% set increment = 19 %} + {% endif %} + + {# Compute start and end rows (end is inclusive) #} + {% set start = ns.offset %} + {% set end = ns.offset + 7 %} + + .. csv-to-list-table:: + :file: {{ current_csv }} + :rows: {{ start }}-{{ end }} + + {# Update the offset for the next table in this CSV #} + {% set ns.offset = ns.offset + 8 + increment %} + + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + {% endfor %} + +.. \ No newline at end of file diff --git a/docs/reference/precision-support.rst b/docs/reference/precision-support.rst index 3a96eefd8..e29d3cb63 100644 --- a/docs/reference/precision-support.rst +++ b/docs/reference/precision-support.rst @@ -335,7 +335,12 @@ The following table lists data type support for AMD GPU matrix cores. Atomic operations support ------------------------- -The following table lists data type support for atomic operations. +The following table lists which data types are supported for atomic +operations on AMD GPUs. The atomics operation type behavior is affected by the +memory locations, memory granularity, or scope of operations. For detailed +various support of atomic read-modify-write (atomicRMW) operations collected on +the :ref:`Hardware atomics operation support ` +page. .. tab-set:: @@ -356,7 +361,7 @@ The following table lists data type support for atomic operations. - ❌ - ❌ - ✅ - - ❌ + - ✅ * - MI200 series - ❌ @@ -380,8 +385,8 @@ The following table lists data type support for atomic operations. - Type name - float8 (E4M3) - float8 (E5M2) - - float16 - - bfloat16 + - 2 x float16 + - 2 x bfloat16 - tensorfloat32 - float32 - float64 @@ -390,7 +395,7 @@ The following table lists data type support for atomic operations. - ❌ - ❌ - ✅ - - ❌ + - ✅ - ❌ - ✅ - ❌ @@ -399,7 +404,7 @@ The following table lists data type support for atomic operations. - ❌ - ❌ - ✅ - - ❌ + - ✅ - ❌ - ✅ - ✅ @@ -408,7 +413,7 @@ The following table lists data type support for atomic operations. - ❌ - ❌ - ✅ - - ❌ + - ✅ - ❌ - ✅ - ✅ diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 652bd252f..1f8d00508 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -181,16 +181,16 @@ subtrees: - caption: Reference entries: - - file: reference/api-libraries.md - title: ROCm libraries - - file: reference/rocm-tools.md - title: ROCm tools, compilers, and runtimes - - file: reference/gpu-arch-specs.rst - title: Hardware specifications - - file: reference/precision-support.rst - title: Precision support - - file: reference/graph-safe-support.rst - title: Graph safe support + - file: reference/api-libraries.md + title: ROCm libraries + - file: reference/rocm-tools.md + title: ROCm tools, compilers, and runtimes + - file: reference/gpu-arch-specs.rst + - file: reference/gpu-atomics-operation.rst + - file: reference/precision-support.rst + title: Precision support + - file: reference/graph-safe-support.rst + title: Graph safe support - caption: Contribute entries: