diff --git a/.wordlist.txt b/.wordlist.txt index 5377d2eef..9473bf389 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -96,6 +96,7 @@ DIMM DKMS DL DMA +DOMContentLoaded DNN DNNL DPM @@ -531,6 +532,7 @@ ZenDNN accuracies activations addr +addEventListener ade ai alloc @@ -564,6 +566,7 @@ boson bosons br BrainFloat +btn buildable bursty bzip @@ -575,6 +578,7 @@ centric changelog checkpointing chiplet +classList cmake cmd coalescable @@ -586,6 +590,7 @@ composable concretization config conformant +const constructible convolutional convolves @@ -649,6 +654,7 @@ exascale executables ffmpeg filesystem +forEach fortran fp framebuffer @@ -657,6 +663,7 @@ galb gcc gdb gemm +getAttribute gfortran gfx githooks @@ -808,6 +815,8 @@ recommenders quantile quantizer quasirandom +querySelector +querySelectorAll queueing qwen radeon @@ -870,9 +879,11 @@ scalability scalable scipy seealso +selectedTag sendmsg seqs serializers +setAttribute sglang shader sharding @@ -899,6 +910,7 @@ symlink symlinks sys tabindex +targetContainer td tensorfloat th diff --git a/docs/data/about/compatibility/floating-point-data-types.png b/docs/data/about/compatibility/floating-point-data-types.png index b59b40be4..87c3afe29 100644 Binary files a/docs/data/about/compatibility/floating-point-data-types.png and b/docs/data/about/compatibility/floating-point-data-types.png differ diff --git a/docs/data/reference/precision-support/precision-support.yaml b/docs/data/reference/precision-support/precision-support.yaml new file mode 100644 index 000000000..6f4773319 --- /dev/null +++ b/docs/data/reference/precision-support/precision-support.yaml @@ -0,0 +1,391 @@ +# rocm-library-support.yaml +library_groups: + - group: "ML & Computer Vision" + tag: "ml-cv" + libraries: + - name: "Composable Kernel" + tag: "composable-kernel" + doc_link: "composable_kernel:reference/Composable_Kernel_supported_scalar_types" + data_types: + - type: "int8" + support: "✅" + - type: "int32" + support: "✅" + - type: "float4" + support: "✅" + - type: "float6 (E2M3)" + support: "✅" + - type: "float6 (E3M2)" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "MIGraphX" + tag: "migraphx" + doc_link: "amdmigraphx:reference/cpp" + data_types: + - type: "int8" + support: "⚠️" + - type: "int16" + support: "✅" + - type: "int32" + support: "✅" + - type: "int64" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "MIOpen" + tag: "miopen" + doc_link: "miopen:reference/datatypes" + data_types: + - type: "int8" + support: "⚠️" + - type: "int32" + support: "⚠️" + - type: "float8 (E4M3)" + support: "⚠️" + - type: "float8 (E5M2)" + support: "⚠️" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "⚠️" + - type: "float32" + support: "✅" + - type: "float64" + support: "⚠️" + + - group: "Communication" + tag: "communication" + libraries: + - name: "RCCL" + tag: "rccl" + doc_link: "rccl:api-reference/library-specification" + data_types: + - type: "int8" + support: "✅" + - type: "int32" + support: "✅" + - type: "int64" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - group: "Math Libraries" + tag: "math-libs" + libraries: + - name: "hipBLAS" + tag: "hipblas" + doc_link: "hipblas:reference/data-type-support" + data_types: + - type: "float16" + support: "⚠️" + - type: "bfloat16" + support: "⚠️" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "hipBLASLt" + tag: "hipblaslt" + doc_link: "hipblaslt:reference/data-type-support" + data_types: + - type: "int8" + support: "✅" + - type: "float4" + support: "✅" + - type: "float6 (E2M3)" + support: "✅" + - type: "float6 (E3M2)" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + + - name: "hipFFT" + tag: "hipfft" + doc_link: "hipfft:reference/fft-api-usage" + data_types: + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "hipRAND" + tag: "hiprand" + doc_link: "hiprand:api-reference/data-type-support" + data_types: + - type: "int8" + support: "Output only" + - type: "int16" + support: "Output only" + - type: "int32" + support: "Output only" + - type: "int64" + support: "Output only" + - type: "float16" + support: "Output only" + - type: "float32" + support: "Output only" + - type: "float64" + support: "Output only" + + - name: "hipSOLVER" + tag: "hipsolver" + doc_link: "hipsolver:reference/precision" + data_types: + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "hipSPARSE" + tag: "hipsparse" + doc_link: "hipsparse:reference/precision" + data_types: + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "hipSPARSELt" + tag: "hipsparselt" + doc_link: "hipsparselt:reference/data-type-support" + data_types: + - type: "int8" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + + - name: "rocBLAS" + tag: "rocblas" + doc_link: "rocblas:reference/data-type-support" + data_types: + - type: "float16" + support: "⚠️" + - type: "bfloat16" + support: "⚠️" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocFFT" + tag: "rocfft" + doc_link: "rocfft:reference/api" + data_types: + - type: "float16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocRAND" + tag: "rocrand" + doc_link: "rocrand:api-reference/data-type-support" + data_types: + - type: "int8" + support: "Output only" + - type: "int16" + support: "Output only" + - type: "int32" + support: "Output only" + - type: "int64" + support: "Output only" + - type: "float16" + support: "Output only" + - type: "float32" + support: "Output only" + - type: "float64" + support: "Output only" + + - name: "rocSOLVER" + tag: "rocsolver" + doc_link: "rocsolver:reference/precision" + data_types: + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocSPARSE" + tag: "rocsparse" + doc_link: "rocsparse:reference/precision" + data_types: + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocWMMA" + tag: "rocwmma" + doc_link: "rocwmma:api-reference/api-reference-guide" + data_types: + - type: "int8" + support: "✅" + - type: "int32" + support: "Output only" + - type: "float8 (E4M3)" + support: "Input only" + - type: "float8 (E5M2)" + support: "Input only" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "tensorfloat32" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "Tensile" + tag: "tensile" + doc_link: "tensile:reference/precision-support" + data_types: + - type: "int8" + support: "✅" + - type: "int32" + support: "✅" + - type: "float8 (E4M3)" + support: "✅" + - type: "float8 (E5M2)" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "tensorfloat32" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - group: "Primitives" + tag: "primitives" + libraries: + - name: "hipCUB" + tag: "hipcub" + doc_link: "hipcub:api-reference/data-type-support" + data_types: + - type: "int8" + support: "✅" + - type: "int16" + support: "✅" + - type: "int32" + support: "✅" + - type: "int64" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "hipTensor" + tag: "hiptensor" + doc_link: "hiptensor:api-reference/api-reference" + data_types: + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocPRIM" + tag: "rocprim" + doc_link: "rocprim:reference/data-type-support" + data_types: + - type: "int8" + support: "✅" + - type: "int16" + support: "✅" + - type: "int32" + support: "✅" + - type: "int64" + support: "✅" + - type: "float16" + support: "✅" + - type: "bfloat16" + support: "✅" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" + + - name: "rocThrust" + tag: "rocthrust" + doc_link: "rocthrust:data-type-support" + data_types: + - type: "int8" + support: "✅" + - type: "int16" + support: "✅" + - type: "int32" + support: "✅" + - type: "int64" + support: "✅" + - type: "float16" + support: "⚠️" + - type: "bfloat16" + support: "⚠️" + - type: "float32" + support: "✅" + - type: "float64" + support: "✅" diff --git a/docs/index.md b/docs/index.md index 3a134adb9..5a784c73d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -65,7 +65,7 @@ ROCm documentation is organized into the following categories: * [ROCm libraries](./reference/api-libraries.md) * [ROCm tools, compilers, and runtimes](./reference/rocm-tools.md) * [Accelerator and GPU hardware specifications](./reference/gpu-arch-specs.rst) -* [Precision support](./reference/precision-support.rst) +* [Data types and precision support](./reference/precision-support.rst) * [Graph safe support](./reference/graph-safe-support.rst) ::: diff --git a/docs/reference/precision-support.rst b/docs/reference/precision-support.rst index 4f5be7e33..8ee81e4b3 100644 --- a/docs/reference/precision-support.rst +++ b/docs/reference/precision-support.rst @@ -9,8 +9,8 @@ Data types and precision support ************************************************************* -This topic lists the data types support on AMD GPUs, ROCm libraries along -with corresponding :doc:`HIP ` data types. +This topic summarizes the data types supported on AMD GPUs and accelerators and +ROCm libraries, along with corresponding :doc:`HIP ` data types. Integral types ============== @@ -61,18 +61,38 @@ The floating-point types supported by ROCm are listed in the following table. - Type name - HIP type - Description + + * + - float4 (E2M1) + - | ``__hip_fp4_e2m1`` + - A 4-bit floating-point number with **E2M1** bit layout, as described + in :doc:`low precision floating point types page `. + + * + - float6 (E3M2) + - | ``__hip_fp6_e3m2`` + - A 6-bit floating-point number with **E3M2** bit layout, as described + in :doc:`low precision floating point types page `. + + * + - float6 (E2M3) + - | ``__hip_fp6_e2m3`` + - A 6-bit floating-point number with **E2M3** bit layout, as described + in :doc:`low precision floating point types page `. + * - float8 (E4M3) - | ``__hip_fp8_e4m3_fnuz``, | ``__hip_fp8_e4m3`` - - An 8-bit floating-point number with **S1E4M3** bit layout, as described in :doc:`low precision floating point types page `. + - An 8-bit floating-point number with **E4M3** bit layout, as described in :doc:`low precision floating point types page `. The FNUZ variant has expanded range with no infinity or signed zero (NaN represented as negative zero), while the OCP variant follows the Open Compute Project specification. + * - float8 (E5M2) - | ``__hip_fp8_e5m2_fnuz``, | ``__hip_fp8_e5m2`` - - An 8-bit floating-point number with **S1E5M2** bit layout, as described in :doc:`low precision floating point types page `. + - An 8-bit floating-point number with **E5M2** bit layout, as described in :doc:`low precision floating point types page `. The FNUZ variant has expanded range with no infinity or signed zero (NaN represented as negative zero), while the OCP variant follows the Open Compute Project specification. @@ -81,22 +101,26 @@ The floating-point types supported by ROCm are listed in the following table. - ``half`` - A 16-bit floating-point number that conforms to the IEEE 754-2008 half-precision storage format. + * - bfloat16 - ``bfloat16`` - A shortened 16-bit version of the IEEE 754 single-precision storage format. + * - tensorfloat32 - Not available - A floating-point number that occupies 32 bits or less of storage, providing improved range compared to half (16-bit) format, at (potentially) greater throughput than single-precision (32-bit) formats. + * - float32 - ``float`` - A 32-bit floating-point number that conforms to the IEEE 754 single-precision storage format. + * - float64 - ``double`` @@ -108,8 +132,8 @@ The floating-point types supported by ROCm are listed in the following table. * The float8 and tensorfloat32 types are internal types used in calculations in Matrix Cores and can be stored in any type of the same size. - * CNDA3 natively supports FP8 FNUZ (E4M3 and E5M2), which differs from the customised - FP8 format used in NVIDIA's H100 + * CDNA3 natively supports FP8 FNUZ (E4M3 and E5M2), which differs from the customized + FP8 format used with NVIDIA H100 (`FP8 Formats for Deep Learning `_). * In some AMD documents and articles, float8 (E5M2) is referred to as bfloat8. @@ -168,11 +192,13 @@ Data type support by hardware architecture AMD's GPU lineup spans multiple architecture generations: -* CDNA1 architecture: includes models such as MI100 -* CDNA2 architecture: includes models such as MI210, MI250, and MI250X -* CDNA3 architecture: includes models such as MI300A, MI300X, and MI325X -* RDNA3 architecture: includes models such as RX 7900XT and RX 7900XTX -* RDNA4 architecture: includes models such as RX 9070 and RX 9070XT +* CDNA1 such as MI100 +* CDNA2 such as MI210, MI250, and MI250X +* CDNA3 such as MI300A, MI300X, and MI325X +* CDNA4 such as MI350X and MI355X +* RDNA2 such as PRO W6800 and PRO V620 +* RDNA3 such as RX 7900XT and RX 7900XTX +* RDNA4 such as RX 9070 and RX 9070XT HIP C++ type implementation support ----------------------------------- @@ -188,6 +214,8 @@ following table. - CDNA1 - CDNA2 - CDNA3 + - CDNA4 + - RDNA2 - RDNA3 - RDNA4 @@ -198,6 +226,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``int16_t``, ``uint16_t`` @@ -206,6 +236,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``int32_t``, ``uint32_t`` @@ -214,6 +246,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``int64_t``, ``uint64_t`` @@ -222,6 +256,38 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ + + * + - ``__hip_fp4_e2m1`` + - ❌ + - ❌ + - ❌ + - ✅ + - ❌ + - ❌ + - ❌ + + * + - ``__hip_fp6_e2m3`` + - ❌ + - ❌ + - ❌ + - ✅ + - ❌ + - ❌ + - ❌ + + * + - ``__hip_fp6_e3m2`` + - ❌ + - ❌ + - ❌ + - ✅ + - ❌ + - ❌ + - ❌ * - ``__hip_fp8_e4m3_fnuz`` @@ -230,6 +296,8 @@ following table. - ✅ - ❌ - ❌ + - ❌ + - ❌ * - ``__hip_fp8_e5m2_fnuz`` @@ -238,12 +306,16 @@ following table. - ✅ - ❌ - ❌ + - ❌ + - ❌ * - ``__hip_fp8_e4m3`` - ❌ - ❌ - ❌ + - ✅ + - ❌ - ❌ - ✅ @@ -252,6 +324,8 @@ following table. - ❌ - ❌ - ❌ + - ✅ + - ❌ - ❌ - ✅ @@ -262,6 +336,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``bfloat16`` @@ -270,6 +346,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``float`` @@ -278,6 +356,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ * - ``double`` @@ -286,6 +366,8 @@ following table. - ✅ - ✅ - ✅ + - ✅ + - ✅ .. note:: @@ -314,18 +396,21 @@ The following table lists data type support for compute units. - int16 - int32 - int64 + * - CDNA1 - ✅ - ✅ - ✅ - ✅ + * - CDNA2 - ✅ - ✅ - ✅ - ✅ + * - CDNA3 - ✅ @@ -333,6 +418,20 @@ The following table lists data type support for compute units. - ✅ - ✅ + * + - CDNA4 + - ✅ + - ✅ + - ✅ + - ✅ + + * + - RDNA2 + - ✅ + - ✅ + - ✅ + - ✅ + * - RDNA3 - ✅ @@ -347,53 +446,132 @@ The following table lists data type support for compute units. - ✅ - ✅ - .. tab-item:: Floating-point types - :sync: floating-point-type + .. tab-item:: Low precision floating-point types + :sync: floating-point-type-low .. list-table:: :header-rows: 1 * - Type name + - float4 + - float6 (E2M3) + - float6 (E3M2) - float8 (E4M3) - float8 (E5M2) + + * + - CDNA1 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - CDNA2 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - CDNA3 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - CDNA4 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - RDNA2 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - RDNA3 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - RDNA4 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + .. tab-item:: High precision floating-point types + :sync: floating-point-type-high + + .. list-table:: + :header-rows: 1 + + * + - Type name - float16 - bfloat16 - tensorfloat32 - float32 - float64 + * - CDNA1 - - ❌ - - ❌ - ✅ - ✅ - ❌ - ✅ - ✅ + * - CDNA2 - - ❌ - - ❌ - ✅ - ✅ - ❌ - ✅ - ✅ + * - CDNA3 + - ✅ + - ✅ - ❌ + - ✅ + - ✅ + + * + - CDNA4 + - ✅ + - ✅ - ❌ - ✅ - ✅ + + * + - RDNA2 + - ✅ + - ✅ - ❌ - ✅ - ✅ * - RDNA3 - - ❌ - - ❌ - ✅ - ✅ - ❌ @@ -402,8 +580,6 @@ The following table lists data type support for compute units. * - RDNA4 - - ❌ - - ❌ - ✅ - ✅ - ❌ @@ -429,18 +605,21 @@ The following table lists data type support for AMD GPU matrix cores. - int16 - int32 - int64 + * - CDNA1 - ✅ - ❌ - ❌ - ❌ + * - CDNA2 - ✅ - ❌ - ❌ - ❌ + * - CDNA3 - ✅ @@ -448,6 +627,20 @@ The following table lists data type support for AMD GPU matrix cores. - ❌ - ❌ + * + - CDNA4 + - ✅ + - ❌ + - ❌ + - ❌ + + * + - RDNA2 + - ✅ + - ❌ + - ❌ + - ❌ + * - RDNA3 - ✅ @@ -462,53 +655,132 @@ The following table lists data type support for AMD GPU matrix cores. - ❌ - ❌ - .. tab-item:: Floating-point types - :sync: floating-point-type + .. tab-item:: Low precision floating-point types + :sync: floating-point-type-low .. list-table:: :header-rows: 1 * - Type name + - float4 + - float6 (E2M3) + - float6 (E3M2) - float8 (E4M3) - float8 (E5M2) - - float16 - - bfloat16 - - tensorfloat32 - - float32 - - float64 + * - CDNA1 - ❌ - ❌ - - ✅ - - ✅ - ❌ - - ✅ - ❌ + - ❌ + * - CDNA2 - ❌ - ❌ - - ✅ - - ✅ + - ❌ + - ❌ + - ❌ + + * + - CDNA3 + - ❌ + - ❌ - ❌ - ✅ - ✅ + * - - CDNA3 - - ✅ - - ✅ + - CDNA4 - ✅ - ✅ - ✅ - ✅ - ✅ + * + - RDNA2 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + * - RDNA3 - ❌ - ❌ + - ❌ + - ❌ + - ❌ + + * + - RDNA4 + - ❌ + - ❌ + - ❌ + - ✅ + - ✅ + + .. tab-item:: High precision floating-point types + :sync: floating-point-type-high + + .. list-table:: + :header-rows: 1 + + * + - Type name + - float16 + - bfloat16 + - tensorfloat32 + - float32 + - float64 + + * + - CDNA1 + - ✅ + - ✅ + - ❌ + - ✅ + - ❌ + + * + - CDNA2 + - ✅ + - ✅ + - ❌ + - ✅ + - ✅ + + * + - CDNA3 + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + + * + - CDNA4 + - ✅ + - ✅ + - ✅ + - ✅ + - ✅ + + * + - RDNA2 + - ✅ + - ✅ + - ❌ + - ❌ + - ❌ + + * + - RDNA3 - ✅ - ✅ - ❌ @@ -519,8 +791,6 @@ The following table lists data type support for AMD GPU matrix cores. - RDNA4 - ✅ - ✅ - - ✅ - - ✅ - ❌ - ❌ - ❌ @@ -582,48 +852,59 @@ page. - ✅ - ✅ - .. tab-item:: Floating-point types - :sync: floating-point-type + .. tab-item:: Low precision floating-point types + :sync: floating-point-type-low .. list-table:: :header-rows: 1 * - Type name + - float4 + - float6 (E2M3) + - float6 (E3M2) - float8 (E4M3) - float8 (E5M2) - - 2 x float16 - - 2 x bfloat16 - - tensorfloat32 - - float32 - - float64 + * - CDNA1 - ❌ - ❌ - - ✅ - - ✅ - ❌ - - ✅ - ❌ + - ❌ + * - CDNA2 - ❌ - ❌ - - ✅ - - ✅ - ❌ - - ✅ - - ✅ + - ❌ + - ❌ + * - CDNA3 - ❌ - ❌ - - ✅ - - ✅ - ❌ - - ✅ - - ✅ + - ❌ + - ❌ + + * + - CDNA4 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ + + * + - RDNA2 + - ❌ + - ❌ + - ❌ + - ❌ + - ❌ * - RDNA3 @@ -632,13 +913,79 @@ page. - ❌ - ❌ - ❌ - - ✅ - - ❌ * - RDNA4 - ❌ - ❌ + - ❌ + - ❌ + - ❌ + + .. tab-item:: High precision floating-point types + :sync: floating-point-type-high + + .. list-table:: + :header-rows: 1 + + * + - Type name + - 2 x float16 + - 2 x bfloat16 + - tensorfloat32 + - float32 + - float64 + + * + - CDNA1 + - ✅ + - ✅ + - ❌ + - ✅ + - ❌ + + * + - CDNA2 + - ✅ + - ✅ + - ❌ + - ✅ + - ✅ + + * + - CDNA3 + - ✅ + - ✅ + - ❌ + - ✅ + - ✅ + + * + - CDNA4 + - ✅ + - ✅ + - ❌ + - ✅ + - ✅ + + * + - RDNA2 + - ❌ + - ❌ + - ❌ + - ✅ + - ❌ + + * + - RDNA3 + - ❌ + - ❌ + - ❌ + - ✅ + - ❌ + + * + - RDNA4 - ✅ - ✅ - ❌ @@ -662,295 +1009,64 @@ Libraries input/output type support ----------------------------------- The following tables list ROCm library support for specific input and output -data types. Refer to the corresponding library data type support page for a -detailed description. +data types. Select a library from the below table to view the supported data +types. -.. tab-set:: +.. datatemplate:yaml:: /data/reference/precision-support/precision-support.yaml - .. tab-item:: Integral types - :sync: integral-type + {% set library_groups = data.library_groups %} - .. list-table:: - :header-rows: 1 + .. raw:: html - * - - Library input/output data type name - - int8 - - int16 - - int32 - - int64 +
+
+
Category
+
+ {% for group in library_groups %} +
{{ group.group }}
+ {% endfor %} +
+
- * - - :doc:`Composable Kernel ` - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ❌/❌ +
+
Library
+
+ {% for group in library_groups %} + {% for library in group.libraries %} +
{{ library.name }}
+ {% endfor %} + {% endfor %} +
+
+
- * - - :doc:`hipCUB ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ + {% for group in library_groups %} + {% for library in group.libraries %} - * - - :doc:`hipRAND ` - - NA/✅ - - NA/✅ - - NA/✅ - - NA/✅ + .. container:: model-doc {{ library.tag }} - * - - :doc:`hipSOLVER ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ + For more information, please visit :doc:`{{ library.name }} <{{ library.doc_link }}>`. - * - - :doc:`hipSPARSELt ` - - ✅/✅ - - ❌/❌ - - ❌/❌ - - ❌/❌ + .. list-table:: + :header-rows: 1 + :widths: 70, 30 - * - - :doc:`hipTensor ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ + * + - Data Type + - Support + {% for data_type in library.data_types %} + * + - {{ data_type.type }} + - {{ data_type.support }} + {% endfor %} - * - - :doc:`MIGraphX ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ + {% endfor %} + {% endfor %} - * - - :doc:`MIOpen ` - - ⚠️/⚠️ - - ❌/❌ - - ⚠️/⚠️ - - ❌/❌ +.. note:: - * - - :doc:`RCCL ` - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocFFT ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - * - - :doc:`rocPRIM ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocRAND ` - - NA/✅ - - NA/✅ - - NA/✅ - - NA/✅ - - * - - :doc:`rocSOLVER ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - * - - :doc:`rocThrust ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocWMMA ` - - ✅/✅ - - ❌/❌ - - ❌/✅ - - ❌/❌ - - - .. tab-item:: Floating-point types - :sync: floating-point-type - - .. list-table:: - :header-rows: 1 - - * - - Library input/output data type name - - float8 (E4M3) - - float8 (E5M2) - - float16 - - bfloat16 - - tensorfloat32 - - float32 - - float64 - - * - - :doc:`Composable Kernel ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`hipCUB ` - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`hipRAND ` - - NA/❌ - - NA/❌ - - NA/✅ - - NA/❌ - - NA/❌ - - NA/✅ - - NA/✅ - - * - - :doc:`hipSOLVER ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`hipSPARSELt ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - * - - :doc:`hipTensor ` - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`MIGraphX ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - * - - :doc:`MIOpen ` - - ⚠️/⚠️ - - ⚠️/⚠️ - - ✅/✅ - - ⚠️/⚠️ - - ❌/❌ - - ✅/✅ - - ⚠️/⚠️ - - * - - :doc:`RCCL ` - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocFFT ` - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocPRIM ` - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocRAND ` - - NA/❌ - - NA/❌ - - NA/✅ - - NA/❌ - - NA/❌ - - NA/✅ - - NA/✅ - - * - - :doc:`rocSOLVER ` - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocThrust ` - - ❌/❌ - - ❌/❌ - - ⚠️/⚠️ - - ⚠️/⚠️ - - ❌/❌ - - ✅/✅ - - ✅/✅ - - * - - :doc:`rocWMMA ` - - ✅/❌ - - ✅/❌ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ - - ✅/✅ + The meaning of partial support depends on the library. Please refer to the individual + libraries' documentation for more information. .. note:: @@ -958,6 +1074,15 @@ detailed description. data types for the random values they generate, with no need for input data types. +.. note:: + + hipBLASLt supports additional data types as internal compute types, which may + differ from the supported input/output types shown in the tables above. While + TensorFloat32 is not supported as an input or output type in this library, it + is available as an internal compute type. For complete details on supported + compute types, refer to the :doc:`hipBLASLt ` + documentation. + hipDataType enumeration ----------------------- @@ -1049,6 +1174,24 @@ following table with descriptions and values. - 29 - 8-bit real bfloat8 precision floating-point (OCP version). + * + - ``HIP_R_6F_E2M3`` + - ``__hip_fp6_e2m3`` + - 31 + - 6-bit real float6 precision floating-point. + + * + - ``HIP_R_6F_E3M2`` + - ``__hip_fp6_e3m2`` + - 32 + - 6-bit real bfloat6 precision floating-point. + + * + - ``HIP_R_4F_E2M1`` + - ``__hip_fp4_e2m1`` + - 33 + - 4-bit real float4 precision floating-point. + * - ``HIP_R_8F_E4M3_FNUZ`` - ``__hip_fp8_e4m3_fnuz`` @@ -1061,4 +1204,4 @@ following table with descriptions and values. - 1001 - 8-bit real bfloat8 precision floating-point (FNUZ version). -The full list of the ``hipDataType`` enumeration listed in `library_types.h `_ . +The full list of the ``hipDataType`` enumeration listed in `library_types.h `_. diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 1f56af9a8..b6d1343cf 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -180,7 +180,7 @@ subtrees: - file: reference/gpu-arch-specs.rst - file: reference/gpu-atomics-operation.rst - file: reference/precision-support.rst - title: Precision support + title: Data types and precision support - file: reference/graph-safe-support.rst title: Graph safe support