Pitchfork Standard for Docs (#1918)
2
.github/CODEOWNERS
vendored
@@ -1 +1 @@
|
||||
* @saadrahim @Rmalavally @amd-aakash @zhang2amd @jlgreathouse
|
||||
* @saadrahim @Rmalavally @amd-aakash @zhang2amd @jlgreathouse @samjwu
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
version: 2
|
||||
|
||||
sphinx:
|
||||
configuration: docs/sphinx/conf.py
|
||||
configuration: docs/conf.py
|
||||
|
||||
formats: all
|
||||
|
||||
python:
|
||||
version: "3.8"
|
||||
install:
|
||||
- requirements: docs/sphinx/requirements.txt
|
||||
- requirements: docs/.sphinx/requirements.txt
|
||||
|
||||
10
README.md
@@ -6,6 +6,16 @@ repositories and the associated commit used to build the current ROCm release.
|
||||
|
||||
The default.xml file uses the repo Manifest format.
|
||||
|
||||
## How to build documentation via Sphinx
|
||||
|
||||
```bash
|
||||
cd docs
|
||||
|
||||
pip3 install -r .sphinx/requirements.txt
|
||||
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
|
||||
```
|
||||
|
||||
## ROCm v5.4.3 Release Notes
|
||||
|
||||
ROCm v5.4.3 is now released. For ROCm v5.4.3 documentation, refer to
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import shutil
|
||||
shutil.copy2('../../CHANGELOG.md','./')
|
||||
shutil.copy2('../../RELEASE.md','./')
|
||||
shutil.copy2('../CHANGELOG.md','./')
|
||||
shutil.copy2('../RELEASE.md','./')
|
||||
|
||||
from rocm_docs import ROCmDocs
|
||||
|
||||
|
Before Width: | Height: | Size: 163 KiB After Width: | Height: | Size: 163 KiB |
|
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.1 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 183 KiB After Width: | Height: | Size: 183 KiB |
|
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 38 KiB |
|
Before Width: | Height: | Size: 407 KiB After Width: | Height: | Size: 407 KiB |
|
Before Width: | Height: | Size: 465 KiB After Width: | Height: | Size: 465 KiB |
|
Before Width: | Height: | Size: 207 KiB After Width: | Height: | Size: 207 KiB |
|
Before Width: | Height: | Size: 461 KiB After Width: | Height: | Size: 461 KiB |
|
Before Width: | Height: | Size: 461 KiB After Width: | Height: | Size: 461 KiB |
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
|
Before Width: | Height: | Size: 412 KiB After Width: | Height: | Size: 412 KiB |
|
Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 88 KiB After Width: | Height: | Size: 88 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@@ -35,15 +35,15 @@ The installer will begin to load and detect your system's configuration and
|
||||
compatibility, as shown in Figure 2. A completely loaded AMD HIP SDK Installer
|
||||
window will appear, as shown in Figure 3.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:------------------------------:|
|
||||
| **Figure 1. Setup Icon** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-------------------------------------------:|
|
||||
| **Figure 2. AMD HIP SDK Loading Window** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-----------------------------------------------:|
|
||||
| **Figure 3. AMD HIP SDK Installer Window** |
|
||||
|
||||
@@ -76,7 +76,7 @@ in the upper right corner of the installer window, as seen in Figure 3. Figure 4
|
||||
demonstrates the installer window once the installation components are all
|
||||
deselected.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:--------------------------------------:|
|
||||
| **Figure 4. Deselect All Selection** |
|
||||
|
||||
@@ -95,23 +95,23 @@ Figures 4 through 8 demonstrate the options available to you when you click
|
||||
**Note** You can manually select installation locations for the HIP SDK Core, as
|
||||
shown in Figure 5.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:---------------------------------------:|
|
||||
| **Figure 5. HIP SDK Core Option** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-----------------------------------------:|
|
||||
| **Figure 6. HIP Libraries Option** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-------------------------------------------------------:|
|
||||
| **Figure 7. HIP Runtime Compiler Option** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:---------------------------------------------:|
|
||||
| **Figure 8. HIP Ray Tracing** |
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-----------------------------------------------:|
|
||||
| **Figure 9. BitCode Profiler** |
|
||||
|
||||
@@ -148,7 +148,7 @@ previously installed drivers.
|
||||
|
||||
Please wait for the installation to complete during as shown in Figure 11.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-------------------------------------:|
|
||||
| **Figure 11. Active Installation** |
|
||||
|
||||
@@ -157,7 +157,7 @@ Please wait for the installation to complete during as shown in Figure 11.
|
||||
Once the installation is complete, the installer window may prompt you for a
|
||||
system restart. Click **Restart** at the lower right corner, shown in Figure 12.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:---------------------------------------------------------:|
|
||||
| **Figure 12. Installation Complete** |
|
||||
|
||||
@@ -174,6 +174,6 @@ uninstallation of the HIP SDK Core and drivers repeat the steps in the sections
|
||||
**Note** Selecting **Install** once ROCm has already installed results in its
|
||||
uninstallation.
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:----------------------------------------:|
|
||||
| **Figure 13. HIP SDK Uninstalling** |
|
||||
@@ -9,7 +9,7 @@ each framework. Refer to the ROCm Compatible Frameworks Release Notes for each
|
||||
framework's most current release notes at
|
||||
[/bundle/ROCm-Compatible-Frameworks-Release-Notes/page/Framework_Release_Notes.html](/bundle/ROCm-Compatible-Frameworks-Release-Notes/page/Framework_Release_Notes.html).
|
||||
|
||||
| |
|
||||
| |
|
||||
|:--:|
|
||||
| <b>Figure 5. ROCm Compatible Frameworks Flowchart</b>|
|
||||
|
||||
@@ -47,7 +47,7 @@ This will automatically download the image if it does not exist on the host. You
|
||||
PyTorch supports the ROCm platform by providing tested wheels packages. To access this feature, refer to [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) and choose the "ROCm" compute platform. Figure 6 is a matrix from pytroch.org that illustrates the installation compatibility between ROCm and the PyTorch build.
|
||||
|
||||
|
||||
| |
|
||||
| |
|
||||
|:--:|
|
||||
| <b>Figure 6. Installation Matrix from Pytorch.org</b>|
|
||||
|
||||
1
docs/reference/compilers.md
Normal file
@@ -0,0 +1 @@
|
||||
# Compilers
|
||||
@@ -1,4 +1,4 @@
|
||||
GPU and OS Support
|
||||
# GPU and OS Support
|
||||
|
||||
## OS Support
|
||||
|
||||
@@ -79,7 +79,7 @@ ROCm software support varies by GPU type and Operating System. ROCm ecosystem pr
|
||||
- HIP enables the use of the HIP Runtime only.
|
||||
|
||||
|
||||
### GPU Support Levels
|
||||
### Support Status
|
||||
|
||||
GPU support levels in ROCm:
|
||||
|
||||
@@ -1,882 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
## AMD ROCm™ Releases
|
||||
|
||||
### AMD ROCm™ V5.2 Release
|
||||
|
||||
AMD ROCm v5.2 is now released. The release documentation is available at
|
||||
<https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ V5.1.3 Release
|
||||
|
||||
AMD ROCm v5.1.3 is now released. The release documentation is available at
|
||||
<https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ V5.1.1 Release
|
||||
|
||||
AMD ROCm v5.1.1 is now released. The release documentation is available at
|
||||
<https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ V5.1 Release
|
||||
|
||||
AMD ROCm v5.1 is now released. The release documentation is available at
|
||||
<https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ v5.0.2 Release Notes
|
||||
|
||||
#### Fixed Defects in This Release
|
||||
|
||||
The following defects are fixed in the ROCm v5.0.2 release.
|
||||
|
||||
##### Issue with hostcall Facility in HIP Runtime
|
||||
|
||||
In ROCm v5.0, when using the `assert()` call in a HIP kernel, the compiler may
|
||||
sometimes fail to emit kernel metadata related to the hostcall facility, which
|
||||
results in incomplete initialization of the hostcall facility in the HIP
|
||||
runtime. This can cause the HIP kernel to crash when it attempts to execute the
|
||||
`assert()` call. The root cause was an incorrect check in the compiler to
|
||||
determine whether the hostcall facility is required by the kernel. This is fixed
|
||||
in the ROCm v5.0.2 release. The resolution includes a compiler change, which
|
||||
emits the required metadata by default, unless the compiler can prove that the
|
||||
hostcall facility is not required by the kernel. This ensures that the
|
||||
`assert()` call never fails.
|
||||
|
||||
**Note**: This fix may lead to breakage in some OpenMP offload use cases, which
|
||||
use print inside a target region and result in an abort in device code.
|
||||
The issue will be fixed in a future release.
|
||||
|
||||
##### Compatibility Matrix Updates to ROCm Deep Learning Guide
|
||||
|
||||
The compatibility matrix in the AMD Deep Learning Guide is updated for ROCm
|
||||
v5.0.2.
|
||||
|
||||
For more information and documentation updates, refer to <https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ v5.0.1 Release Notes
|
||||
|
||||
#### Deprecations and Warnings
|
||||
|
||||
##### Refactor of HIPCC/HIPCONFIG
|
||||
|
||||
In prior ROCm releases, by default, the `hipcc`/`hipconfig` Perl scripts were
|
||||
used to identify and set target compiler options, target platform, compiler, and
|
||||
runtime appropriately.
|
||||
|
||||
In ROCm v5.0.1, `hipcc.bin` and `hipconfig.bin` have been added as the compiled
|
||||
binary implementations of the `hipcc` and `hipconfig`. These new binaries are
|
||||
currently a work-in-progress, considered, and marked as experimental. ROCm plans
|
||||
to fully transition to `hipcc.bin` and `hipconfig.bin` in the a future ROCm
|
||||
release. The existing `hipcc` and `hipconfig` Perl scripts are renamed to
|
||||
`hipcc.pl` and `hipconfig.pl` respectively. New top-level `hipcc` and
|
||||
`hipconfig` Perl scripts are created, which can switch between the Perl script
|
||||
or the compiled binary based on the environment variable
|
||||
`HIPCC_USE_PERL_SCRIPT`.
|
||||
|
||||
In ROCm 5.0.1, by default, this environment variable is set to use `hipcc` and
|
||||
`hipconfig` through the Perl scripts. Subsequently, Perl scripts will no longer
|
||||
be available in ROCm in a future release.
|
||||
|
||||
#### ROCM DOCUMENTATION UPDATES FOR ROCM 5.0.1
|
||||
|
||||
- ROCm Downloads Guide
|
||||
|
||||
- ROCm Installation Guide
|
||||
|
||||
- ROCm Release Notes
|
||||
|
||||
For more information, see <https://docs.amd.com>.
|
||||
|
||||
### AMD ROCm™ v5.0 Release Notes
|
||||
|
||||
## ROCm Installation Updates
|
||||
|
||||
This document describes the features, fixed issues, and information about
|
||||
downloading and installing the AMD ROCm™ software.
|
||||
|
||||
It also covers known issues and deprecations in this release.
|
||||
|
||||
## Notice for Open-source and Closed-source ROCm Repositories in Future Releases
|
||||
|
||||
To make a distinction between open-source and closed-source components, all ROCm
|
||||
repositories will consist of sub-folders in future releases.
|
||||
|
||||
- All open-source components will be placed in the `base-url/<rocm-ver>/main`
|
||||
sub-folder
|
||||
- All closed-source components will reside in the
|
||||
`base-url/<rocm-ver>/proprietary` sub-folder
|
||||
|
||||
### List of Supported Operating Systems
|
||||
|
||||
The AMD ROCm platform supports the following operating systems:
|
||||
|
||||
| **OS-Version (64-bit)** | **Kernel Versions** |
|
||||
|:-------------------------------:|:-----------------------------:|
|
||||
| CentOS 8.3 | `4.18.0-193.el8` |
|
||||
| CentOS 7.9 | `3.10.0-1127` |
|
||||
| RHEL 8.5 | `4.18.0-348.7.1.el8_5.x86_64` |
|
||||
| RHEL 8.4 | `4.18.0-305.el8.x86_64` |
|
||||
| RHEL 7.9 | `3.10.0-1160.6.1.el7` |
|
||||
| SLES 15 SP3 | `5.3.18-59.16-default` |
|
||||
| Ubuntu 20.04.3 | `5.8.0 LTS / 5.11 HWE` |
|
||||
| Ubuntu 18.04.5 [5.4 HWE kernel] | `5.4.0-71-generic` |
|
||||
|
||||
#### Support for RHEL v8.5
|
||||
|
||||
This release extends support for RHEL v8.5.
|
||||
|
||||
#### Supported GPUs
|
||||
|
||||
##### Radeon Pro V620 and W6800 Workstation GPUs
|
||||
|
||||
This release extends ROCm support for Radeon Pro V620 and W6800 Workstation
|
||||
GPUs.
|
||||
|
||||
- SRIOV virtualization support for Radeon Pro V620
|
||||
- KVM Hypervisor (1VF support only) on Ubuntu Host OS with Ubuntu, CentOs, and
|
||||
RHEL Guest
|
||||
- Support for ROCm-SMI in an SRIOV environment. For more details, refer to the
|
||||
ROCm SMI API documentation.
|
||||
|
||||
**Note:** Radeon Pro v620 is not supported on SLES.
|
||||
|
||||
### ROCm Installation Updates for ROCm v5.0
|
||||
|
||||
This release has the following ROCm installation enhancements.
|
||||
|
||||
#### Support for Kernel Mode Driver
|
||||
|
||||
In this release, users can install the kernel-mode driver using the Installer
|
||||
method. Some of the ROCm-specific use cases that the installer currently
|
||||
supports are:
|
||||
|
||||
- OpenCL (ROCr/KFD based) runtime
|
||||
- HIP runtimes
|
||||
- ROCm libraries and applications
|
||||
- ROCm Compiler and device libraries
|
||||
- ROCr runtime and thunk
|
||||
- Kernel-mode driver
|
||||
|
||||
#### Support for Multi-version ROCm Installation and Uninstallation
|
||||
|
||||
Users now can install multiple ROCm releases simultaneously on a system using
|
||||
the newly introduced installer script and package manager install mechanism.
|
||||
|
||||
Users can also uninstall multi-version ROCm releases using the
|
||||
`amdgpu-uninstall` script and package manager.
|
||||
|
||||
#### Support for Updating Information on Local Repositories
|
||||
|
||||
In this release, the `amdgpu-install` script automates the process of updating
|
||||
local repository information before proceeding to ROCm installation.
|
||||
|
||||
#### Support for Release Upgrades
|
||||
|
||||
Users can now upgrade the existing ROCm installation to specific or latest ROCm
|
||||
releases.
|
||||
|
||||
For more details, refer to the AMD ROCm Installation Guide v5.0.
|
||||
|
||||
## AMD ROCm V5.0 Documentation Updates
|
||||
|
||||
### New AMD ROCm Information Portal – ROCm v4.5 and Above
|
||||
|
||||
Beginning ROCm release v5.0, AMD ROCm documentation has a new portal at
|
||||
<https://docs.amd.com>. This portal consists of ROCm documentation v4.5 and
|
||||
above.
|
||||
|
||||
For documentation prior to ROCm v4.5, you may continue to access
|
||||
<https://rocmdocs.amd.com>.
|
||||
|
||||
### Documentation Updates for ROCm 5.0
|
||||
|
||||
#### Deployment Tools
|
||||
|
||||
##### ROCm Data Center Tool Documentation Updates
|
||||
|
||||
- ROCm Data Center Tool User Guide
|
||||
- ROCm Data Center Tool API Guide
|
||||
|
||||
##### ROCm System Management Interface Updates
|
||||
|
||||
- System Management Interface Guide
|
||||
- System Management Interface API Guide
|
||||
|
||||
##### ROCm Command Line Interface Updates
|
||||
|
||||
- Command Line Interface Guide
|
||||
|
||||
#### Machine Learning/AI Documentation Updates
|
||||
|
||||
- Deep Learning Guide
|
||||
- MIGraphX API Guide
|
||||
- MIOpen API Guide
|
||||
- MIVisionX API Guide
|
||||
|
||||
#### ROCm Libraries Documentation Updates
|
||||
|
||||
- hipSOLVER User Guide
|
||||
- RCCL User Guide
|
||||
- rocALUTION User Guide
|
||||
- rocBLAS User Guide
|
||||
- rocFFT User Guide
|
||||
- rocRAND User Guide
|
||||
- rocSOLVER User Guide
|
||||
- rocSPARSE User Guide
|
||||
- rocThrust User Guide
|
||||
|
||||
#### Compilers and Tools
|
||||
|
||||
##### ROCDebugger Documentation Updates
|
||||
|
||||
- ROCDebugger User Guide
|
||||
- ROCDebugger API Guide
|
||||
|
||||
##### ROCTracer
|
||||
|
||||
- ROCTracer User Guide
|
||||
- ROCTracer API Guide
|
||||
|
||||
##### Compilers
|
||||
|
||||
- AMD Instinct High Performance Computing and Tuning Guide
|
||||
- AMD Compiler Reference Guide
|
||||
|
||||
##### HIPify Documentation
|
||||
|
||||
- HIPify User Guide
|
||||
- HIP Supported CUDA API Reference Guide
|
||||
|
||||
##### ROCm Debug Agent
|
||||
|
||||
- ROCm Debug Agent Guide
|
||||
- System Level Debug Guide
|
||||
- ROCm Validation Suite
|
||||
|
||||
#### Programming Models Documentation
|
||||
|
||||
##### HIP Documentation
|
||||
|
||||
- HIP Programming Guide
|
||||
- HIP API Guide
|
||||
- HIP FAQ Guide
|
||||
|
||||
##### OpenMP Documentation
|
||||
|
||||
- OpenMP Support Guide
|
||||
|
||||
#### ROCm Glossary
|
||||
|
||||
- ROCm Glossary – Terms and Definitions
|
||||
|
||||
### AMD ROCm Legacy Documentation Links – ROCm v4.3 and Prior
|
||||
|
||||
- For AMD ROCm documentation, see <https://rocmdocs.amd.com/en/latest/>
|
||||
|
||||
- For installation instructions on supported platforms, see
|
||||
<https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html>
|
||||
|
||||
- For AMD ROCm binary structure, see
|
||||
<https://rocmdocs.amd.com/en/latest/Installation_Guide/Software-Stack-for-AMD-GPU.html>
|
||||
|
||||
- For AMD ROCm release history, see
|
||||
<https://rocmdocs.amd.com/en/latest/Current_Release_Notes/ROCm-Version-History.html>
|
||||
|
||||
## What's New in This Release
|
||||
|
||||
### HIP Enhancements
|
||||
|
||||
The ROCm v5.0 release consists of the following HIP enhancements.
|
||||
|
||||
#### HIP Installation Guide Updates
|
||||
|
||||
The HIP Installation Guide is updated to include building HIP from source on the
|
||||
NVIDIA platform.
|
||||
|
||||
Refer to the HIP Installation Guide v5.0 for more details.
|
||||
|
||||
#### Managed Memory Allocation
|
||||
|
||||
Managed memory, including the `__managed__` keyword, is now supported in the HIP
|
||||
combined host/device compilation. Through unified memory allocation, managed
|
||||
memory allows data to be shared and accessible to both the CPU and GPU using a
|
||||
single pointer. The allocation is managed by the AMD GPU driver using the Linux
|
||||
Heterogeneous Memory Management (HMM) mechanism. The user can call managed
|
||||
memory API `hipMallocManaged` to allocate a large chunk of HMM memory, execute
|
||||
kernels on a device, and fetch data between the host and device as needed.
|
||||
|
||||
**Note:** In a HIP application, it is recommended to do a capability check
|
||||
before calling the managed memory APIs. For example,
|
||||
|
||||
```cpp
|
||||
int managed_memory = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&managed_memory, hipDeviceAttributeManagedMemory, p_gpuDevice));
|
||||
|
||||
if (!managed_memory) {
|
||||
printf ("info: managed memory access not supported on the device %d\n Skipped\n", p_gpuDevice);
|
||||
} else {
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
HIPCHECK(hipMallocManaged(&Hmm, N * sizeof(T)));
|
||||
. . .
|
||||
}
|
||||
```
|
||||
|
||||
**Note:** The managed memory capability check may not be necessary; however, if
|
||||
HMM is not supported, managed `malloc` will fall back to using system memory.
|
||||
|
||||
Refer to the HIP API documentation for more details on managed memory APIs.
|
||||
|
||||
For the application, see
|
||||
[hipMallocManaged.cpp](https://github.com/ROCm-Developer-Tools/HIP/blob/rocm-4.5.x/tests/src/runtimeApi/memory/hipMallocManaged.cpp)
|
||||
|
||||
### New Environment Variable
|
||||
|
||||
The following new environment variable is added in this release:
|
||||
|
||||
| **Environment Variable** | **Value** | **Description** |
|
||||
|:------------------------:|:---------------------:|:--------------------------------------------------------|
|
||||
| `HSA_COOP_CU_COUNT` | 0 or 1 (default is 0) | Some processors support more compute units than can reliably be used in a cooperative dispatch. Setting the environment variable `HSA_COOP_CU_COUNT` to 1 will cause ROCr to return the correct CU count for cooperative groups through the `HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT` attribute of `hsa_agent_get_info()`. Setting `HSA_COOP_CU_COUNT` to other values, or leaving it unset, will cause ROCr to return the same CU count for the attributes `HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT` and `HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT`. Future ROCm releases will make `HSA_COOP_CU_COUNT = 1` the default. |
|
||||
|
||||
### ROCm Math and Communication Libraries
|
||||
|
||||
| **Library** | **Changes** |
|
||||
|:--------------:|:----------------------------------------------------------------------------------------|
|
||||
| **rocBLAS** | **Added** <ul><li>Added `rocblas_get_version_string_size` convenience function</li><li>Added `rocblas_xtrmm_outofplace`, an out-of-place version of `rocblas_xtrmm`</li><li>Added hpl and trig initialization for `gemm_ex` to `rocblas-bench`</li><li>Added source code gemm. It can be used as an alternative to Tensile for debugging and development</li><li>Added option `ROCM_MATHLIBS_API_USE_HIP_COMPLEX` to opt-in to use `hipFloatComplex` and `hipDoubleComplex`</li></ul> **Optimizations** <ul><li>Improved performance of non-batched and batched single-precision GER for size m > 1024. Performance enhanced by 5-10% measured on a MI100 (gfx908) GPU.</li><li>Improved performance of non-batched and batched HER for all sizes and data types. Performance enhanced by 2-17% measured on a MI100 (gfx908) GPU.</li></ul> **Changed** <ul><li>Instantiate templated rocBLAS functions to reduce size of librocblas.so</li><li>Removed static library dependency on msgpack</li><li>Removed boost dependencies for clients</li></ul> **Fixed** <ul><li>Option to install script to build only rocBLAS clients with a pre-built rocBLAS library</li><li>Correctly set output of `nrm2_batched_ex` and `nrm2_strided_batched_ex` when given bad input</li><li>Fix for dgmm with side == `rocblas_side_left` and a negative incx</li><li>Fixed out-of-bounds read for small trsm</li><li>Fixed numerical checking for `tbmv_strided_batched`</li></ul> |
|
||||
| | |
|
||||
| **hipBLAS** | **Added** <ul><li>Added rocSOLVER functions to hipblas-bench</li><li>Added option `ROCM_MATHLIBS_API_USE_HIP_COMPLEX` to opt-in to use `hipFloatComplex` and `hipDoubleComplex`</li><li>Added compilation warning for future trmm changes</li><li>Added documentation to `hipblas.h`</li><li>Added option to forgo pivoting for getrf and getri when ipiv is `nullptr`</li><li>Added code coverage option</li></ul> **Fixed** <ul><li>Fixed use of incorrect `HIP_PATH` when building from source.</li><li>Fixed windows packaging</li><li>Allowing negative increments in hipblas-bench</li><li>Removed boost dependency</li></ul> |
|
||||
| | |
|
||||
| **rocFFT** | **Changed** <ul><li>Enabled runtime compilation of single FFT kernels > length 1024.</li><li>Re-aligned split device library into 4 roughly equal libraries.</li><li>Implemented the FuseShim framework to replace the original OptimizePlan</li><li>Implemented the generic buffer-assignment framework. The buffer assignment is no longer performed by each node. A generic algorithm is designed to test and pick the best assignment path. With the help of FuseShim, more kernel-fusions are achieved.</li><li>Do not read the imaginary part of the DC and Nyquist modes for even-length complex-to-real transforms.</li></ul> **Optimizations** <ul><li>Optimized twiddle-conjugation; complex-to-complex inverse transforms have similar performance to foward transforms now.</li><li>Improved performance of single-kernel small 2D transforms.</li></ul> |
|
||||
| | |
|
||||
| **hipFFT** | **Fixed** <ul><li>Fixed incorrect reporting of rocFFT version.</li></ul> **Changed** <ul><li>Unconditionally enabled callback functionality. On the CUDA backend, callbacks only run correctly when hipFFT is built as a static library, and is linked against the static cuFFT library.</li></ul> |
|
||||
| | |
|
||||
| **rocSPARSE** | **Added** <ul><li>csrmv, coomv, ellmv, hybmv for (conjugate) transposed matricescsrmv for symmetric matrices</li></ul> **Changed** <ul><li>`spmm_ex` is now deprecated and will be removed in the next major release</li></ul> **Improved** <ul><li>Optimization for gtsv</li></ul> |
|
||||
| | |
|
||||
| **hipSPARSE** | **Added** <ul><li>Added (conjugate) transpose support for csrmv, hybmv and spmv routines</li></ul> |
|
||||
| | |
|
||||
| **rocALUTION** | **Changed** <ul><li>Removed deprecated GlobalPairwiseAMG class, please use PairwiseAMG instead.</li></ul> **Improved** <ul><li>Improved documentation</li></ul> |
|
||||
| | |
|
||||
| **rocTHRUST** | **Updates** <ul><li>Updated to match upstream Thrust 1.13.0</li><li>Updated to match upstream Thrust 1.14.0</li><li>Added async scan</li></ul> **Changed** <ul><li>Scan algorithms: `inclusive_scan` now uses the input-type as accumulator-type, `exclusive_scan` uses initial-value-type. This particularly changes behaviour of small-size input types with large-size output types (e.g. `short` input, `int` output). And low-res input with high-res output (e.g. float input, double output)</li></ul> |
|
||||
| | |
|
||||
| **rocSOLVER** | **Added** <ul><li>Symmetric matrix factorizations: <ul><li>LASYF</li><li>SYTF2, SYTRF (with `batched` and `strided_batched` versions)</li></ul><li>Added `rocsolver_get_version_string_size` to help with version string queries</li><li>Added `rocblas_layer_mode_ex` and the ability to print kernel calls in the trace and profile logs</li><li>Expanded batched and `strided_batched` sample programs.</li></ul> **Optimizations** <ul><li>Improved general performance of LU factorization</li><li>Increased parallelism of specialized kernels when compiling from source, reducing build times on multi-core systems.</li></ul> **Changed** <ul><li>The rocsolver-test client now prints the rocSOLVER version used to run the tests, rather than the version used to build them</li><li>The rocsolver-bench client now prints the rocSOLVER version used in the benchmark</li></ul> **Fixed** <ul><li>Added missing `stdint.h` include to `rocsolver.h`</li></ul> |
|
||||
| | |
|
||||
| **hipSOLVER** | **Added** <ul><li>Added SYTRF functions: `hipsolverSsytrf_bufferSize`, `hipsolverDsytrf_bufferSize`, `hipsolverCsytrf_bufferSize`, `hipsolverZsytrf_bufferSize`, `hipsolverSsytrf`, `hipsolverDsytrf`, `hipsolverCsytrf`, `hipsolverZsytrf`</li></ul> **Fixed** <ul><li>Fixed use of incorrect `HIP_PATH` when building from source</li></ul> |
|
||||
| | |
|
||||
| **RCCL** | **Added** <ul><li>Compatibility with NCCL 2.10.3</li></ul> **Known issues** <ul><li>Managed memory is not currently supported for clique-based kernels</li></ul> |
|
||||
| | |
|
||||
| **hipCUB** | **Fixed** <ul><li>Added missing includes to `hipcub.hpp`</li></ul> **Added** <ul><li>Bfloat16 support to test cases (`device_reduce` & `device_radix_sort`)</li><li>Device merge sort</li><li>Block merge sort</li><li>API update to CUB 1.14.0</li></ul> **Changed** <ul><li>The `SetupNVCC.cmake` automatic target selector select all of the capabalities of all available card for NVIDIA backend.</li></ul> |
|
||||
| | |
|
||||
| **rocPRIM** | **Fixed** <ul><li>Enable `bfloat16` tests and reduce threshold for `bfloat16`</li><li>Fix device scan `limit_size` feature</li><li>Non-optimized builds no longer trigger local memory limit errors</li></ul> **Added** <ul><li>Scan size limit feature</li><li>Reduce size limit feature</li><li>Transform size limit feature</li><li>Add `block_load_striped` and `block_store_striped`</li><li>Add `gather_to_blocked` to gather values from other threads into a blocked arrangement</li><li>The block sizes for device merge sorts initial block sort and its merge steps are now separate in its kernel config (the block sort step supports multiple items per thread)</li></ul> **Changed** <ul><li>`size_limit` for scan, reduce and transform can now be set in the config struct instead of a parameter</li><li>`device_scan` and `device_segmented_scan`: `inclusive_scan` now uses the input-type as accumulator-type, `exclusive_scan` uses initial-value-type. This particularly changes behaviour of small-size input types with large-size output types (e.g. `short` input, `int` output) and low-res input with high-res output (e.g. `float` input, `double` output)</li><li>Revert old Fiji workaround, because the issue was solved at compiler side</li><li>Update `README` cmake minimum version number</li><li>Block sort support multiple items per thread. Currently only powers of two block sizes, and items per threads are supported and only for full blocks</li><li>Bumped the minimum required version of CMake to 3.16</li></ul> **Known issues** <ul><li>Unit tests may soft hang on MI200 when running in `hipMallocManaged` mode.</li><li>`device_segmented_radix_sort`, `device_scan` unit tests failing for HIP on `WindowsReduceEmptyInput` cause random failure with `bfloat16`</li><li>Managed memory is not currently supported for clique-based kernels</li></ul> |
|
||||
|
||||
### System Management Interface
|
||||
|
||||
#### Clock Throttling for GPU Events
|
||||
|
||||
This feature lists GPU events as they occur in real-time and can be used with
|
||||
`kfdtest` to produce `vm_fault` events for testing.
|
||||
|
||||
The command can be called with either `-e` or `--showevents` like this:
|
||||
|
||||
```bash
|
||||
-e [EVENT [EVENT ...]], --showevents [EVENT [EVENT ...]] Show event list
|
||||
```
|
||||
|
||||
Where `EVENT` is any list combination of `VM_FAULT`, `THERMAL_THROTTLE`, or
|
||||
`GPU_RESET` and is **NOT** case sensitive.
|
||||
|
||||
**Note:** If no event arguments are passed, all events will be watched by
|
||||
default.
|
||||
|
||||
##### CLI Commands
|
||||
|
||||
```bash
|
||||
$ rocm-smi --showevents vm_fault thermal_throttle gpu_reset
|
||||
|
||||
======================= ROCm System Management Interface =======================
|
||||
================================= Show Events ==================================
|
||||
press 'q' or 'ctrl + c' to quit
|
||||
DEVICE TIME TYPE DESCRIPTION
|
||||
|
||||
============================= End of ROCm SMI Log ==============================
|
||||
```
|
||||
|
||||
(Run `kfdtest` in another window to test for `vm_fault` events.)
|
||||
|
||||
**Note:** Unlike other rocm-smi CLI commands, this command does not quit unless
|
||||
specified by the user. Users may press either `q` or `ctrl + c` to quit.
|
||||
|
||||
#### Display XGMI Bandwidth Between Nodes
|
||||
|
||||
The `rsmi_minmax_bandwidth_get` API reads the HW Topology file and displays
|
||||
bandwidth (min-max) between any two NUMA nodes in a matrix format.
|
||||
|
||||
The Command Line Interface (CLI) command can be called as follows:
|
||||
|
||||
```bash
|
||||
$ rocm-smi --shownodesbw
|
||||
|
||||
======================= ROCm System Management Interface =======================
|
||||
================================== Bandwidth ===================================
|
||||
GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 GPU6 GPU7
|
||||
GPU0 N/A 50000-200000 50000-50000 0-0 0-0 0-0 50000-100000 0-0
|
||||
GPU1 50000-200000 N/A 0-0 50000-50000 0-0 50000-50000 0-0 0-0
|
||||
GPU2 50000-50000 0-0 N/A 50000-200000 50000-100000 0-0 0-0 0-0
|
||||
GPU3 0-0 50000-50000 50000-200000 N/A 0-0 0-0 0-0 50000-50000
|
||||
GPU4 0-0 0-0 50000-100000 0-0 N/A 50000-200000 50000-50000 0-0
|
||||
GPU5 0-0 50000-50000 0-0 0-0 50000-200000 N/A 0-0 50000-50000
|
||||
GPU6 50000-100000 0-0 0-0 0-0 50000-50000 0-0 N/A 50000-200000
|
||||
GPU7 0-0 0-0 0-0 50000-50000 0-0 50000-50000 50000-200000 N/A
|
||||
Format: min-max; Units: mps
|
||||
============================= End of ROCm SMI Log ==============================
|
||||
```
|
||||
|
||||
The sample output above shows the maximum theoretical xgmi bandwidth between 2
|
||||
numa nodes,
|
||||
|
||||
**Note:** "0-0" min-max bandwidth indicates devices are not connected directly.
|
||||
|
||||
#### P2P Connection Status
|
||||
|
||||
The `rsmi_is_p2p_accessible` API returns `True` if P2P can be implemented
|
||||
between two nodes, and returns `False` if P2P cannot be implemented between the
|
||||
two nodes.
|
||||
|
||||
The Command Line Interface command can be called as follows:
|
||||
|
||||
```bash
|
||||
rocm-smi --showtopoaccess
|
||||
```
|
||||
|
||||
Sample Output:
|
||||
|
||||
```bash
|
||||
$ rocm-smi --showtopoaccess
|
||||
======================= ROCm System Management Interface =======================
|
||||
===================== Link accessibility between two GPUs ======================
|
||||
GPU0 GPU1
|
||||
GPU0 True True
|
||||
GPU1 True True
|
||||
============================= End of ROCm SMI Log ==============================
|
||||
```
|
||||
|
||||
## Breaking Changes
|
||||
|
||||
### Runtime Breaking Change
|
||||
|
||||
Re-ordering of the enumerated type in `hip_runtime_api.h` to better match CUDA.
|
||||
See below for the difference in enumerated types.
|
||||
|
||||
ROCm software will be affected if any of the defined enums listed below are used
|
||||
in the code. Applications built with ROCm v5.0 enumerated types will work with a
|
||||
ROCm 4.5.2 driver. However, an undefined behavior error will occur with a ROCm
|
||||
v4.5.2 application that uses these enumerated types with a ROCm 5.0 runtime.
|
||||
|
||||
```c
|
||||
typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeMaxThreadsPerBlock, // Maximum number of threads per block.
|
||||
hipDeviceAttributeMaxBlockDimX, // Maximum x-dimension of a block.
|
||||
hipDeviceAttributeMaxBlockDimY, // Maximum y-dimension of a block.
|
||||
hipDeviceAttributeMaxBlockDimZ, // Maximum z-dimension of a block.
|
||||
hipDeviceAttributeMaxGridDimX, // Maximum x-dimension of a grid.
|
||||
hipDeviceAttributeMaxGridDimY, // Maximum y-dimension of a grid.
|
||||
hipDeviceAttributeMaxGridDimZ, // Maximum z-dimension of a grid.
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, // Maximum shared memory available per block in bytes.
|
||||
hipDeviceAttributeTotalConstantMemory, // Constant memory size in bytes.
|
||||
hipDeviceAttributeWarpSize, // Warp size in threads.
|
||||
hipDeviceAttributeMaxRegistersPerBlock, // Maximum number of 32-bit registers available to a
|
||||
// thread block. This number is shared by all thread
|
||||
// blocks simultaneously resident on a
|
||||
// multiprocessor.
|
||||
hipDeviceAttributeClockRate, // Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryClockRate, // Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryBusWidth, // Global memory bus width in bits.
|
||||
hipDeviceAttributeMultiprocessorCount, // Number of multiprocessors on the device.
|
||||
hipDeviceAttributeComputeMode, // Compute mode that device is currently in.
|
||||
hipDeviceAttributeL2CacheSize, // Size of L2 cache in bytes. 0 if the device doesn't have L2
|
||||
// cache.
|
||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, // Maximum resident threads per
|
||||
// multiprocessor.
|
||||
hipDeviceAttributeComputeCapabilityMajor, // Major compute capability version number.
|
||||
hipDeviceAttributeComputeCapabilityMinor, // Minor compute capability version number.
|
||||
hipDeviceAttributeConcurrentKernels, // Device can possibly execute multiple kernels
|
||||
// concurrently.
|
||||
hipDeviceAttributePciBusId, // PCI Bus ID.
|
||||
hipDeviceAttributePciDeviceId, // PCI Device ID.
|
||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, // Maximum Shared Memory Per
|
||||
// Multiprocessor.
|
||||
hipDeviceAttributeIsMultiGpuBoard, // Multiple GPU devices.
|
||||
hipDeviceAttributeIntegrated, // iGPU
|
||||
hipDeviceAttributeCooperativeLaunch, // Support cooperative launch
|
||||
hipDeviceAttributeCooperativeMultiDeviceLaunch, // Support cooperative launch on multiple devices
|
||||
hipDeviceAttributeMaxTexture1DWidth, // Maximum number of elements in 1D images
|
||||
hipDeviceAttributeMaxTexture2DWidth, // Maximum dimension width of 2D images in image elements
|
||||
hipDeviceAttributeMaxTexture2DHeight, // Maximum dimension height of 2D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DWidth, // Maximum dimension width of 3D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DHeight, // Maximum dimensions height of 3D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DDepth, // Maximum dimensions depth of 3D images in image elements
|
||||
hipDeviceAttributeCudaCompatibleBegin = 0,
|
||||
hipDeviceAttributeHdpMemFlushCntl, // Address of the HDP\_MEM\_COHERENCY\_FLUSH\_CNTL register
|
||||
hipDeviceAttributeHdpRegFlushCntl, // Address of the HDP\_REG\_COHERENCY\_FLUSH\_CNTL register
|
||||
hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, // Whether ECC support is enabled.
|
||||
hipDeviceAttributeAccessPolicyMaxWindowSize, // Cuda only. The maximum size of the window policy in bytes.
|
||||
hipDeviceAttributeAsyncEngineCount, // Cuda only. Asynchronous engines number.
|
||||
hipDeviceAttributeCanMapHostMemory, // Whether host memory can be mapped into device address space
|
||||
hipDeviceAttributeCanUseHostPointerForRegisteredMem, // Cuda only. Device can access host registered memory
|
||||
// at the same virtual address as the CPU
|
||||
hipDeviceAttributeClockRate, // Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeComputeMode, // Compute mode that device is currently in.
|
||||
hipDeviceAttributeComputePreemptionSupported, // Cuda only. Device supports Compute Preemption.
|
||||
hipDeviceAttributeConcurrentKernels, // Device can possibly execute multiple kernels concurrently.
|
||||
hipDeviceAttributeConcurrentManagedAccess, // Device can coherently access managed memory concurrently with the CPU
|
||||
hipDeviceAttributeCooperativeLaunch, // Support cooperative launch
|
||||
hipDeviceAttributeCooperativeMultiDeviceLaunch, // Support cooperative launch on multiple devices
|
||||
hipDeviceAttributeDeviceOverlap, // Cuda only. Device can concurrently copy memory and execute a kernel.
|
||||
// Deprecated. Use instead asyncEngineCount.
|
||||
hipDeviceAttributeDirectManagedMemAccessFromHost, // Host can directly access managed memory on
|
||||
// the device without migration
|
||||
hipDeviceAttributeGlobalL1CacheSupported, // Cuda only. Device supports caching globals in L1
|
||||
hipDeviceAttributeHostNativeAtomicSupported, // Cuda only. Link between the device and the host supports native atomic operations
|
||||
hipDeviceAttributeIntegrated, // Device is integrated GPU
|
||||
hipDeviceAttributeIsMultiGpuBoard, // Multiple GPU devices.
|
||||
hipDeviceAttributeKernelExecTimeout, // Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeL2CacheSize, // Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
||||
hipDeviceAttributeLocalL1CacheSupported, // caching locals in L1 is supported
|
||||
hipDeviceAttributeLuid, // Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeLuidDeviceNodeMask, // Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms
|
||||
hipDeviceAttributeComputeCapabilityMajor, // Major compute capability version number.
|
||||
hipDeviceAttributeManagedMemory, // Device supports allocating managed memory on this system
|
||||
hipDeviceAttributeMaxBlocksPerMultiProcessor, // Cuda only. Max block size per multiprocessor
|
||||
hipDeviceAttributeMaxBlockDimX, // Max block size in width.
|
||||
hipDeviceAttributeMaxBlockDimY, // Max block size in height.
|
||||
hipDeviceAttributeMaxBlockDimZ, // Max block size in depth.
|
||||
hipDeviceAttributeMaxGridDimX, // Max grid size in width.
|
||||
hipDeviceAttributeMaxGridDimY, // Max grid size in height.
|
||||
hipDeviceAttributeMaxGridDimZ, // Max grid size in depth.
|
||||
hipDeviceAttributeMaxSurface1D, // Maximum size of 1D surface.
|
||||
hipDeviceAttributeMaxSurface1DLayered, // Cuda only. Maximum dimensions of 1D layered surface.
|
||||
hipDeviceAttributeMaxSurface2D, // Maximum dimension (width, height) of 2D surface.
|
||||
hipDeviceAttributeMaxSurface2DLayered, // Cuda only. Maximum dimensions of 2D layered surface.
|
||||
hipDeviceAttributeMaxSurface3D, // Maximum dimension (width, height, depth) of 3D surface.
|
||||
hipDeviceAttributeMaxSurfaceCubemap, // Cuda only. Maximum dimensions of Cubemap surface.
|
||||
hipDeviceAttributeMaxSurfaceCubemapLayered, // Cuda only. Maximum dimension of Cubemap layered surface.
|
||||
hipDeviceAttributeMaxTexture1DWidth, // Maximum size of 1D texture.
|
||||
hipDeviceAttributeMaxTexture1DLayered, // Cuda only. Maximum dimensions of 1D layered texture.
|
||||
hipDeviceAttributeMaxTexture1DLinear, // Maximum number of elements allocatable in a 1D linear texture.
|
||||
// Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
|
||||
hipDeviceAttributeMaxTexture1DMipmap, // Cuda only. Maximum size of 1D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture2DWidth, // Maximum dimension width of 2D texture.
|
||||
hipDeviceAttributeMaxTexture2DHeight, // Maximum dimension hight of 2D texture.
|
||||
hipDeviceAttributeMaxTexture2DGather, // Cuda only. Maximum dimensions of 2D texture if gather operations performed.
|
||||
hipDeviceAttributeMaxTexture2DLayered, // Cuda only. Maximum dimensions of 2D layered texture.
|
||||
hipDeviceAttributeMaxTexture2DLinear, // Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
|
||||
hipDeviceAttributeMaxTexture2DMipmap, // Cuda only. Maximum dimensions of 2D mipmapped texture.
|
||||
hipDeviceAttributeMaxTexture3DWidth, // Maximum dimension width of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DHeight, // Maximum dimension height of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DDepth, // Maximum dimension depth of 3D texture.
|
||||
hipDeviceAttributeMaxTexture3DAlt, // Cuda only. Maximum dimensions of alternate 3D texture.
|
||||
hipDeviceAttributeMaxTextureCubemap, // Cuda only. Maximum dimensions of Cubemap texture
|
||||
hipDeviceAttributeMaxTextureCubemapLayered, // Cuda only. Maximum dimensions of Cubemap layered texture.
|
||||
hipDeviceAttributeMaxThreadsDim, // Maximum dimension of a block
|
||||
hipDeviceAttributeMaxThreadsPerBlock, // Maximum number of threads per block.
|
||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, // Maximum resident threads per multiprocessor.
|
||||
hipDeviceAttributeMaxPitch, // Maximum pitch in bytes allowed by memory copies
|
||||
hipDeviceAttributeMemoryBusWidth, // Global memory bus width in bits.
|
||||
hipDeviceAttributeMemoryClockRate, // Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeComputeCapabilityMinor, // Minor compute capability version number.
|
||||
hipDeviceAttributeMultiGpuBoardGroupID, // Cuda only. Unique ID of device group on the same multi-GPU board
|
||||
hipDeviceAttributeMultiprocessorCount, // Number of multiprocessors on the device.
|
||||
hipDeviceAttributeName, // Device name.
|
||||
hipDeviceAttributePageableMemoryAccess, // Device supports coherently accessing pageable memory
|
||||
// without calling hipHostRegister on it
|
||||
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, // Device accesses pageable memory via the host's page tables
|
||||
hipDeviceAttributePciBusId, // PCI Bus ID.
|
||||
hipDeviceAttributePciDeviceId, // PCI Device ID.
|
||||
hipDeviceAttributePciDomainID, // PCI Domain ID.
|
||||
hipDeviceAttributePersistingL2CacheMaxSize, // Cuda11 only. Maximum l2 persisting lines capacity in bytes
|
||||
hipDeviceAttributeMaxRegistersPerBlock, // 32-bit registers available to a thread block. This number is shared
|
||||
// by all thread blocks simultaneously resident on a multiprocessor.
|
||||
hipDeviceAttributeMaxRegistersPerMultiprocessor, // 32-bit registers available per block.
|
||||
hipDeviceAttributeReservedSharedMemPerBlock, // Cuda11 only. Shared memory reserved by CUDA driver per block.
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, // Maximum shared memory available per block in bytes.
|
||||
hipDeviceAttributeSharedMemPerBlockOptin, // Cuda only. Maximum shared memory per block usable by special opt in.
|
||||
hipDeviceAttributeSharedMemPerMultiprocessor, // Cuda only. Shared memory available per multiprocessor.
|
||||
hipDeviceAttributeSingleToDoublePrecisionPerfRatio, // Cuda only. Performance ratio of single precision to double precision.
|
||||
hipDeviceAttributeStreamPrioritiesSupported, // Cuda only. Whether to support stream priorities.
|
||||
hipDeviceAttributeSurfaceAlignment, // Cuda only. Alignment requirement for surfaces
|
||||
hipDeviceAttributeTccDriver, // Cuda only. Whether device is a Tesla device using TCC driver
|
||||
hipDeviceAttributeTextureAlignment, // Alignment requirement for textures
|
||||
hipDeviceAttributeTexturePitchAlignment, // Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeTotalConstantMemory, // Constant memory size in bytes.
|
||||
hipDeviceAttributeTotalGlobalMem, // Global memory available on devicice.
|
||||
hipDeviceAttributeUnifiedAddressing, // Cuda only. An unified address space shared with the host.
|
||||
hipDeviceAttributeUuid, // Cuda only. Unique ID in 16 byte.
|
||||
hipDeviceAttributeWarpSize, // Warp size in threads.
|
||||
hipDeviceAttributeMaxPitch, // Maximum pitch in bytes allowed by memory copies
|
||||
hipDeviceAttributeTextureAlignment, //Alignment requirement for textures
|
||||
hipDeviceAttributeTexturePitchAlignment, //Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeKernelExecTimeout, //Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeCanMapHostMemory, //Device can map host memory into device address space
|
||||
hipDeviceAttributeEccEnabled, //Device has ECC support enabled
|
||||
hipDeviceAttributeCudaCompatibleEnd = 9999,
|
||||
hipDeviceAttributeAmdSpecificBegin = 10000,
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, // Supports cooperative launch on multiple
|
||||
// devices with unmatched functions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, // Supports cooperative launch on multiple
|
||||
// devices with unmatched grid dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, // Supports cooperative launch on multiple
|
||||
// devices with unmatched block dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, // Supports cooperative launch on multiple
|
||||
// devices with unmatched shared memories
|
||||
hipDeviceAttributeAsicRevision, // Revision of the GPU in this device
|
||||
hipDeviceAttributeManagedMemory, // Device supports allocating managed memory on this system
|
||||
hipDeviceAttributeDirectManagedMemAccessFromHost, // Host can directly access managed memory on
|
||||
// the device without migration
|
||||
hipDeviceAttributeConcurrentManagedAccess, // Device can coherently access managed memory
|
||||
// concurrently with the CPU
|
||||
hipDeviceAttributePageableMemoryAccess, // Device supports coherently accessing pageable memory
|
||||
// without calling hipHostRegister on it
|
||||
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, // Device accesses pageable memory via
|
||||
// the host's page tables
|
||||
hipDeviceAttributeCanUseStreamWaitValue // '1' if Device supports hipStreamWaitValue32() and
|
||||
// hipStreamWaitValue64(), '0' otherwise.
|
||||
hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, // Frequency in khz of the timer used by the device-side "clock"
|
||||
hipDeviceAttributeArch, // Device architecture
|
||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, // Maximum Shared Memory PerMultiprocessor.
|
||||
hipDeviceAttributeGcnArch, // Device gcn architecture
|
||||
hipDeviceAttributeGcnArchName, // Device gcnArch name in 256 bytes
|
||||
hipDeviceAttributeHdpMemFlushCntl, // Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||
hipDeviceAttributeHdpRegFlushCntl, // Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, // Supports cooperative launch on multiple
|
||||
// devices with unmatched functions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, // Supports cooperative launch on multiple
|
||||
// devices with unmatched grid dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, // Supports cooperative launch on multiple
|
||||
// devices with unmatched block dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, // Supports cooperative launch on multiple
|
||||
// devices with unmatched shared memories
|
||||
hipDeviceAttributeIsLargeBar, // Whether it is LargeBar
|
||||
hipDeviceAttributeAsicRevision, // Revision of the GPU in this device
|
||||
hipDeviceAttributeCanUseStreamWaitValue, // '1' if Device supports hipStreamWaitValue32() and
|
||||
// hipStreamWaitValue64() , '0' otherwise.
|
||||
hipDeviceAttributeAmdSpecificEnd = 19999,
|
||||
hipDeviceAttributeVendorSpecificBegin = 20000, // Extended attributes for vendors
|
||||
} hipDeviceAttribute_t;
|
||||
```
|
||||
|
||||
## Known Issues in This Release
|
||||
|
||||
### Incorrect dGPU Behavior When Using AMDVBFlash Tool
|
||||
|
||||
The AMDVBFlash tool, used for flashing the VBIOS image to dGPU, does not
|
||||
communicate with the ROM Controller specifically when the driver is present.
|
||||
This is because the driver, as part of its runtime power management feature,
|
||||
puts the dGPU to a sleep state.
|
||||
|
||||
As a workaround, users can run `amdgpu.runpm=0`, which temporarily disables the
|
||||
runtime power management feature from the driver and dynamically changes some
|
||||
power control-related sysfs files.
|
||||
|
||||
### Issue with START Timestamp in ROCProfiler
|
||||
|
||||
Users may encounter an issue with the enabled timestamp functionality for
|
||||
monitoring one or multiple counters. ROCProfiler outputs the following four
|
||||
timestamps for each kernel:
|
||||
|
||||
- Dispatch
|
||||
- Start
|
||||
- End
|
||||
- Complete
|
||||
|
||||
#### Issue
|
||||
|
||||
This defect is related to the Start timestamp functionality, which incorrectly
|
||||
shows an earlier time than the Dispatch timestamp.
|
||||
|
||||
To reproduce the issue,
|
||||
|
||||
1. Enable timing using the `--timestamp on` flag.
|
||||
2. Use the `-i` option with the input filename that contains the name of the
|
||||
counter(s) to monitor.
|
||||
3. Run the program.
|
||||
4. Check the output result file.
|
||||
|
||||
##### Current behavior
|
||||
|
||||
`BeginNS` is lower than `DispatchNS`, which is incorrect.
|
||||
|
||||
##### Expected behavior
|
||||
|
||||
The correct order is:
|
||||
|
||||
`Dispatch < Start < End < Complete`
|
||||
|
||||
Users cannot use ROCProfiler to measure the time spent on each kernel because of
|
||||
the incorrect timestamp with counter collection enabled.
|
||||
|
||||
##### Recommended Workaround
|
||||
|
||||
Users are recommended to collect kernel execution timestamps without monitoring
|
||||
counters, as follows:
|
||||
|
||||
1. Enable timing using the `--timestamp on` flag, and run the application.
|
||||
2. Rerun the application using the `-i` option with the input filename that
|
||||
contains the name of the counter(s) to monitor, and save this to a different
|
||||
output file using the `-o` flag.
|
||||
3. Check the output result file from step 1.
|
||||
4. The order of timestamps correctly displays as:
|
||||
|
||||
`DispathNS < BeginNS < EndNS < CompleteNS`
|
||||
|
||||
1. Users can find the values of the collected counters in the output file
|
||||
generated in step 2.
|
||||
|
||||
### No Support for SMI and ROCDebugger on SRIOV
|
||||
|
||||
System Management Interface (SMI) and ROCDebugger are not supported in the SRIOV
|
||||
environment on any GPU, including the
|
||||
**Radeon Pro V620 and W6800 Workstation GPUs**. For more information, refer to
|
||||
the Systems Management Interface documentation.
|
||||
|
||||
## Deprecations and Warnings in This Release
|
||||
|
||||
### ROCm Libraries Changes – Deprecations and Deprecation Removal
|
||||
|
||||
- The `hipfft.h` header is now provided only by the `hipfft` package. Up to ROCm
|
||||
5.0, users would get `hipfft.h` in the rocfft package too.
|
||||
- The GlobalPairwiseAMG class is now entirely removed, users should use the
|
||||
PairwiseAMG class instead.
|
||||
- The `rocsparse_spmm` signature in 5.0 was changed to match that of
|
||||
`rocsparse_spmm_ex`. In 5.0, `rocsparse_spmm_ex` is still present, but
|
||||
deprecated. Signature diff for `rocsparse_spmm`
|
||||
|
||||
#### `rocsparse_spmm` in 5.0
|
||||
|
||||
```c
|
||||
rocsparse_status rocsparse_spmm(rocsparse_handle handle,
|
||||
rocsparse_operation trans_A,
|
||||
rocsparse_operation trans_B,
|
||||
const void* alpha,
|
||||
const rocsparse_spmat_descr mat_A,
|
||||
const rocsparse_dnmat_descr mat_B,
|
||||
const void* beta,
|
||||
const rocsparse_dnmat_descr mat_C,
|
||||
rocsparse_datatype compute_type,
|
||||
rocsparse_spmm_alg alg,
|
||||
rocsparse_spmm_stage stage,
|
||||
size_t* buffer_size,
|
||||
void* temp_buffer);
|
||||
```
|
||||
|
||||
### `rocsparse_spmm` in 4.0
|
||||
|
||||
```c
|
||||
rocsparse_status rocsparse_spmm(rocsparse_handle handle,
|
||||
rocsparse_operation trans_A,
|
||||
rocsparse_operation trans_B,
|
||||
const void* alpha,
|
||||
const rocsparse_spmat_descr mat_A,
|
||||
const rocsparse_dnmat_descr mat_B,
|
||||
const void* beta,
|
||||
const rocsparse_dnmat_descr mat_C,
|
||||
rocsparse_datatype compute_type,
|
||||
rocsparse_spmm_alg alg,
|
||||
size_t* buffer_size,
|
||||
void* temp_buffer);
|
||||
```
|
||||
|
||||
### HIP API Deprecations and Warnings
|
||||
|
||||
#### Warning - Arithmetic Operators of HIP Complex and Vector Types
|
||||
|
||||
In this release, arithmetic operators of HIP complex and vector types are
|
||||
deprecated.
|
||||
|
||||
- As alternatives to arithmetic operators of HIP complex types, users can use
|
||||
arithmetic operators of `std::complex` types.
|
||||
- As alternatives to arithmetic operators of HIP vector types, users can use the
|
||||
operators of the native clang vector type associated with the data member of
|
||||
HIP vector types.
|
||||
|
||||
During the deprecation, two macros `__HIP_ENABLE_COMPLEX_OPERATORS` and
|
||||
`__HIP_ENABLE_VECTOR_OPERATORS` are provided to allow users to conditionally
|
||||
enable arithmetic operators of HIP complex or vector types.
|
||||
|
||||
Note, the two macros are mutually exclusive and, by default, set to off.
|
||||
|
||||
The arithmetic operators of HIP complex and vector types will be removed in a
|
||||
future release.
|
||||
|
||||
Refer to the HIP API Guide for more information.
|
||||
|
||||
#### HIPCC/HIPCONFIG Refactoring
|
||||
|
||||
In prior ROCm releases, by default, the `hipcc`/`hipconfig` Perl scripts were
|
||||
used to identify and set target compiler options, target platform, compiler, and
|
||||
runtime appropriately.
|
||||
|
||||
In ROCm v5.0, `hipcc.bin` and `hipconfig.bin` have been added as the compiled
|
||||
binary implementations of the `hipcc` and `hipconfig`. These new binaries are
|
||||
currently a work-in-progress, considered, and marked as experimental. ROCm plans
|
||||
to fully transition to `hipcc.bin` and `hipconfig.bin` in the a future ROCm
|
||||
release. The existing `hipcc` and `hipconfig` Perl scripts are renamed to
|
||||
`hipcc.pl` and `hipconfig.pl` respectively. New top-level `hipcc` and
|
||||
`hipconfig` Perl scripts are created, which can switch between the Perl script
|
||||
or the compiled binary based on the environment variable
|
||||
`HIPCC_USE_PERL_SCRIPT`.
|
||||
|
||||
In ROCm 5.0, by default, this environment variable is set to use `hipcc` and
|
||||
`hipconfig` through the Perl scripts.
|
||||
|
||||
Subsequently, Perl scripts will no longer be available in ROCm in a future
|
||||
release.
|
||||
|
||||
### Warning - Compiler-Generated Code Object Version 4 Deprecation
|
||||
|
||||
Support for loading compiler-generated code object version 4 will be deprecated
|
||||
in a future release with no release announcement and replaced with code object 5
|
||||
as the default version.
|
||||
|
||||
The current default is code object version 4.
|
||||
|
||||
### Warning - MIOpenTensile Deprecation
|
||||
|
||||
MIOpenTensile will be deprecated in a future release.
|
||||
|
||||
## Archived Documentation
|
||||
|
||||
Older rocm documentation is archived at <https://rocmdocs.amd.com>.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
The information presented in this document is for informational purposes only
|
||||
and may contain technical inaccuracies, omissions, and typographical errors.
|
||||
The information contained herein is subject to change and may be rendered
|
||||
inaccurate for many reasons, including but not limited to product and roadmap
|
||||
changes, component and motherboard versionchanges, new model and/or product
|
||||
releases, product differences between differing manufacturers, software changes,
|
||||
BIOS flashes, firmware upgrades, or the like. Any computer system has risks of
|
||||
security vulnerabilities that cannot be completely prevented or mitigated.
|
||||
AMD assumes no obligation to update or otherwise correct or revise this
|
||||
information. However, AMD reserves the right to revise this information and to
|
||||
make changes from time to time to the content hereof without obligation of AMD
|
||||
to notify any person of such revisions or changes. THIS INFORMATION IS PROVIDED
|
||||
"AS IS." AMD MAKES NO REPRESENTATIONS OR WARRANTIES WITH RESPECT TO THE CONTENTS
|
||||
HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY INACCURACIES, ERRORS, OR OMISSIONS
|
||||
THAT MAY APPEAR IN THIS INFORMATION. AMD SPECIFICALLY DISCLAIMS ANY IMPLIED
|
||||
WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR
|
||||
PURPOSE. IN NO EVENT WILL AMD BE LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT,
|
||||
INDIRECT, SPECIAL, OR OTHER CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY
|
||||
INFORMATION CONTAINED HEREIN, EVEN IF AMD IS EXPRESSLY ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.AMD, the AMD Arrow logo, and combinations thereof
|
||||
are trademarks of Advanced Micro Devices, Inc. Other product names used in this
|
||||
publication are for identification purposes only and may be trademarks of their
|
||||
respective companies. ©[2021]Advanced Micro Devices, Inc.All rights reserved.
|
||||
|
||||
### Third-party Disclaimer
|
||||
|
||||
Third-party content is licensed to you directly by the third party that owns the
|
||||
content and is not licensed to you by AMD. ALL LINKED THIRD-PARTY CONTENT IS
|
||||
PROVIDED “AS IS” WITHOUT A WARRANTY OF ANY KIND. USE OF SUCH THIRD-PARTY CONTENT
|
||||
IS DONE AT YOUR SOLE DISCRETION AND UNDER NO CIRCUMSTANCES WILL AMD BE LIABLE TO
|
||||
YOU FOR ANY THIRD-PARTY CONTENT. YOU ASSUME ALL RISK AND ARE SOLELY RESPONSIBLE
|
||||
FOR ANY DAMAGES THAT MAY ARISE FROM YOUR USE OF THIRD-PARTY CONTENT.
|
||||
@@ -1,8 +0,0 @@
|
||||
# How to build documentation via Sphinx
|
||||
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
python -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
|
||||
```
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
# Release Notes
|
||||
@@ -1 +0,0 @@
|
||||
<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 139.72 33.32"><defs><style>.cls-1{fill:#fff;}</style></defs><title>AMD-logo-white-v2</title><path class="cls-1" d="M33,31.14H25.21l-2.37-5.72H9.92L7.76,31.14H.14L11.78,2.26h8.34Zm-16.89-22L11.83,20.39h8.89Z" transform="translate(-0.14 -0.03)"/><path class="cls-1" d="M61.1,2.26h6.27V31.14h-7.2v-18l-7.79,9.06h-1.1L43.49,13.1v18h-7.2V2.26h6.27L51.83,13Z" transform="translate(-0.14 -0.03)"/><path class="cls-1" d="M85.61,2.26c10.54,0,16,6.56,16,14.48,0,8.3-5.25,14.4-16.77,14.4H72.86V2.26ZM80.06,25.85h4.7c7.24,0,9.4-4.91,9.4-9.15,0-5-2.67-9.15-9.48-9.15H80.06Z" transform="translate(-0.14 -0.03)"/><polygon class="cls-1" points="130.64 9.08 115.75 9.08 106.68 0 139.72 0 139.72 33.05 130.64 23.97 130.64 9.08"/><polygon class="cls-1" points="115.74 23.98 115.74 10.9 106.4 20.24 106.4 33.33 119.48 33.33 128.82 23.98 115.74 23.98"/></svg>
|
||||
|
Before Width: | Height: | Size: 924 B |
@@ -1,9 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" version="1.0" preserveAspectRatio="xMidYMid meet" viewBox="1.16 -0.07 462.14 198.07">
|
||||
|
||||
<g transform="translate(0.000000,480.000000) scale(0.100000,-0.100000)" fill="#000000" stroke="none">
|
||||
<path d="M15 4788 c-3 -7 -4 -452 -3 -988 l3 -975 379 -3 c505 -3 622 11 710 88 77 68 105 188 106 456 0 273 -19 354 -106 433 -36 33 -74 56 -119 72 l-65 23 60 22 c161 58 198 133 188 387 -9 216 -55 324 -176 408 -99 69 -251 89 -683 89 -223 0 -291 -3 -294 -12z m645 -363 c24 -24 25 -31 28 -159 5 -206 -9 -236 -114 -236 l-44 0 0 216 0 217 52 -6 c37 -5 59 -14 78 -32z m-12 -726 c12 -5 27 -20 32 -34 13 -34 13 -406 0 -439 -12 -33 -45 -53 -102 -61 l-48 -7 0 281 0 282 48 -7 c26 -4 57 -11 70 -15z"/>
|
||||
<path d="M1395 4788 c-3 -7 -4 -452 -3 -988 l3 -975 445 0 445 0 3 198 2 197 -190 0 -190 0 0 220 0 220 160 0 160 0 0 179 c0 154 -2 180 -16 185 -9 3 -81 6 -160 6 l-144 0 0 185 0 185 170 0 170 0 0 200 0 200 -425 0 c-331 0 -427 -3 -430 -12z"/>
|
||||
<path d="M2317 4793 c-4 -3 -7 -93 -7 -199 l0 -193 148 -3 147 -3 5 -785 5 -785 258 -3 257 -2 0 790 0 789 153 3 152 3 3 175 c1 96 0 185 -3 198 l-5 22 -554 0 c-304 0 -556 -3 -559 -7z"/>
|
||||
<path d="M3595 4788 c-2 -7 -9 -51 -15 -98 -6 -47 -17 -134 -25 -195 -8 -60 -21 -162 -29 -225 -14 -108 -23 -172 -61 -460 -9 -63 -22 -164 -30 -225 -8 -60 -30 -227 -49 -370 -19 -143 -37 -290 -41 -328 l-7 -67 263 2 264 3 13 145 c7 80 15 160 18 178 l5 32 84 0 c56 0 86 -4 92 -12 4 -7 12 -87 18 -178 l10 -165 264 -3 264 -2 -6 42 c-4 24 -20 135 -37 248 -29 192 -78 522 -100 670 -5 36 -23 157 -40 270 -17 113 -42 279 -55 370 -48 322 -54 361 -60 370 -10 16 -734 13 -740 -2z m411 -630 c7 -90 20 -235 29 -323 9 -88 18 -193 22 -233 l6 -73 -84 3 -84 3 3 60 c2 56 33 337 52 485 27 210 33 250 37 246 3 -3 11 -78 19 -168z"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.7 KiB |
|
Before Width: | Height: | Size: 10 KiB |
@@ -1,22 +0,0 @@
|
||||
$(document).ready(() => {
|
||||
const copy = async(event) => {
|
||||
return await navigator.clipboard.writeText($(event.target).attr('copydata'));
|
||||
}
|
||||
|
||||
$('.table td code').each( function () {
|
||||
var text = $(this).text()
|
||||
$(this).addClass('hovertext')
|
||||
$(this).attr('copydata', text)
|
||||
$(this).attr('data-hover', "Click to copy.")
|
||||
var new_text = text.replaceAll(/_([^\u200B])/g, '_\u200B$1').replaceAll(/([a-z])([A-Z])/g, '$1\u200B$2')
|
||||
$(this).text(new_text)
|
||||
$(this).click((event) => {
|
||||
copy(event)
|
||||
$(event.target).attr('data-hover', "Copied!")
|
||||
$(event.target).on("mouseleave", () => {
|
||||
$(event.target).attr('data-hover', "Click to copy.")
|
||||
$(event.target).off("mouseleave")
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,72 +0,0 @@
|
||||
@import url("theme.css");
|
||||
|
||||
:root {
|
||||
--pst-font-size-base: 11px;
|
||||
}
|
||||
|
||||
div#site-navigation {
|
||||
height: fit-content;
|
||||
min-height: calc(100vh - 190px);
|
||||
}
|
||||
|
||||
div.content-container {
|
||||
overflow-y: clip;
|
||||
}
|
||||
|
||||
.hovertext {
|
||||
position: relative;
|
||||
/* border-bottom: 1px dotted black; */
|
||||
}
|
||||
|
||||
.hovertext:before {
|
||||
content: attr(data-hover);
|
||||
visibility: hidden;
|
||||
opacity: 0;
|
||||
width: 140px;
|
||||
background-color: black;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
border-radius: 5px;
|
||||
padding: 5px 0;
|
||||
transition: opacity 0.5s ease-in-out;
|
||||
|
||||
position: absolute;
|
||||
z-index: 1;
|
||||
left: 0;
|
||||
top: 110%;
|
||||
}
|
||||
|
||||
.hovertext:hover:before {
|
||||
opacity: 1;
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
div#rdc-watermark-container {
|
||||
pointer-events: none;
|
||||
position: fixed;
|
||||
height: 100vh;
|
||||
width: 100vw;
|
||||
top: 0;
|
||||
left: 0;
|
||||
z-index: 10000;
|
||||
}
|
||||
|
||||
img#rdc-watermark {
|
||||
pointer-events: none;
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform-origin: center;
|
||||
transform: translate(-50%, -50%) rotate(-45deg);
|
||||
opacity: 10%;
|
||||
z-index: 10000;
|
||||
max-width: 100%;
|
||||
max-height: calc(100% - 200px);
|
||||
object-fit: contain;
|
||||
width: 45%;
|
||||
}
|
||||
|
||||
ul.bd-breadcrumbs {
|
||||
margin-bottom: 0;
|
||||
margin-top: 1px;
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
.rocm-footer {
|
||||
background-color: black;
|
||||
color: white;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
border-top: 1px solid hsla(216,3%,63%,.5);
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
padding-top: 5px;
|
||||
line-height: 20px;
|
||||
height: 120px;
|
||||
}
|
||||
|
||||
.rocm-footer a, .rocm-footer p {
|
||||
color: white;
|
||||
}
|
||||
|
||||
.rocm-footer>ul {
|
||||
border-bottom: 1px solid hsla(216,3%,63%,.5);
|
||||
justify-content: flex-end;
|
||||
margin-top:15px;
|
||||
}
|
||||
|
||||
.rocm-footer ul {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
flex-wrap: wrap;
|
||||
font-size: 12px;
|
||||
padding: 0;
|
||||
padding-bottom: 12px;
|
||||
width:98vw;
|
||||
list-style: none inside none;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.rocm-footer div {
|
||||
width: 98vw;
|
||||
}
|
||||
|
||||
.rocm-footer div {
|
||||
text-align: start;
|
||||
}
|
||||
|
||||
.rocm-footer a:hover {
|
||||
color: #e9ecef;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.rocm-footer ul li {
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
.rocm-footer ul li+li {
|
||||
margin-left: 10px;
|
||||
padding-left: 8px;
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
.rocm-header {
|
||||
background-color: black;
|
||||
position: -webkit-sticky; /* Safari */
|
||||
position: sticky;
|
||||
top: 0;
|
||||
width: 100%;
|
||||
min-height: 50px;
|
||||
overflow: hidden;
|
||||
font-family: 'Noto Sans', sans-serif;
|
||||
font-size: 16px;
|
||||
text-align: left;
|
||||
height:70px;
|
||||
}
|
||||
|
||||
.rocm-header a {
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.rocm-header-link p {
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.rocm-header img#amd-logo{
|
||||
margin: 1.5em;
|
||||
width: 8.25rem;
|
||||
}
|
||||
|
||||
.rocm-header img#rocm-logo{
|
||||
margin: 0;
|
||||
max-height: 100%;
|
||||
}
|
||||
|
||||
.rocm-header-buttons {
|
||||
display: inline-block;
|
||||
height: fit-content;
|
||||
max-width: 100%;
|
||||
width: fit-content;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.rocm-header-link:first-child {
|
||||
margin-left: 4em;
|
||||
}
|
||||
|
||||
.rocm-header-link {
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
height: fit-content;
|
||||
text-align: center;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.rocm-header-link.rocm-header-last {
|
||||
position: absolute;
|
||||
right: 4em;
|
||||
top: 50%;
|
||||
transform: translate(0, -50%);
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.rocm-header-link .rocm-link-box, .rocm-header-link p {
|
||||
vertical-align: middle;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.rocm-header-link .rocm-link box {
|
||||
font-size: x-large;
|
||||
}
|
||||
|
||||
.rocm-header-link p {
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.rocm-header-link img, .rocm-header-link .rocm-link-box {
|
||||
max-height: 50px;
|
||||
margin-left: 2em;
|
||||
margin-right: 2em;
|
||||
}
|
||||
|
||||
.rocm-header-link .glow-wrap{
|
||||
overflow: hidden;
|
||||
position: absolute;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
top: 0;
|
||||
}
|
||||
|
||||
.rocm-header-link .glow{
|
||||
display: block;
|
||||
position:absolute;
|
||||
width: 20%;
|
||||
height: 100%;
|
||||
background: rgba(255,255,255,.2);
|
||||
top: 0;
|
||||
left: 0;
|
||||
transform-origin: right top;
|
||||
transform: translate(-100%, 0) skew(-45deg);
|
||||
filter: blur(2px);
|
||||
transition: all .5s cubic-bezier(0.645, 0.045, 0.355, 1);
|
||||
}
|
||||
|
||||
.rocm-header-link:hover .glow{
|
||||
transform-origin: left bottom;
|
||||
transform: translate(1000%, 0) skew(-45deg);
|
||||
transition: all .5s cubic-bezier(0.645, 0.045, 0.355, 1);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
{% if show_copyright and copyright %}
|
||||
<div class="copyright">
|
||||
{% if hasdoc('copyright') %}
|
||||
{% trans path=pathto('copyright'), copyright=copyright|e %}© <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %}
|
||||
<br/>
|
||||
{% else %}
|
||||
{% trans copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %}
|
||||
<br/>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -1,43 +0,0 @@
|
||||
<!-- Copied from pydata-sphinx-theme -->
|
||||
|
||||
{%- macro icon_link_nav_item(url, icon, name, type, attributes='') -%}
|
||||
{%- if url | length > 2 %}
|
||||
<li class="nav-item">
|
||||
{%- set attributesDefault = { "href": url, "title": name, "class": "nav-link", "rel": "noopener", "target": "_blank", "data-bs-toggle": "tooltip", "data-bs-placement": "bottom"} %}
|
||||
{%- if attributes %}{% for key, val in attributes.items() %}
|
||||
{% set _ = attributesDefault.update(attributes) %}
|
||||
{% endfor %}{% endif -%}
|
||||
{% set attributeString = [] %}
|
||||
{% for key, val in attributesDefault.items() %}
|
||||
{%- set _ = attributeString.append('%s="%s"' % (key, val)) %}
|
||||
{% endfor %}
|
||||
{% set attributeString = attributeString | join(" ") -%}
|
||||
<a {{ attributeString }}>
|
||||
{%- if type == "fontawesome" -%}
|
||||
<span><i class="{{ icon }}"></i></span>
|
||||
<label class="sr-only">{{ _(name) }}</label>
|
||||
{%- elif type == "local" -%}
|
||||
<img src="{{ pathto(icon, 1) }}" class="icon-link-image" alt="{{ _(name) }}"/>
|
||||
{%- elif type == "url" -%}
|
||||
<img src="{{ icon }}" class="icon-link-image" alt="{{ _(name) }}"/>
|
||||
{%- else %}
|
||||
<span>Incorrectly configured icon link. Type must be `fontawesome`, `url` or `local`.</span>
|
||||
{%- endif -%}
|
||||
</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
<ul id="navbar-icon-links"
|
||||
class="navbar-nav"
|
||||
aria-label="{{ _(theme_icon_links_label) }}">
|
||||
{%- block icon_link_shortcuts -%}
|
||||
{{ icon_link_nav_item("http://www.github.com/AMD", "fab fa-github", "GitHub", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("http://www.facebook.com/amd", "fab fa-facebook-f", "Facebook", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("http://www.twitter.com/amd", "fab fa-twitter", "Twitter", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("http://www.instagram.com/amd", "fab fa-instagram", "Instagram", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("http://www.linkedin.com/company/amd", "fab fa-linkedin", "LinkedIn", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("https://www.amd.com/en/corporate/subscriptions", "fa fa-envelope", "Mail", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("https://www.youtube.com/user/amd?sub_confirmation=1", "fab fa-youtube", "Youtube", "fontawesome") -}}
|
||||
{{ icon_link_nav_item("https://www.twitch.tv/amd", "fab fa-twitch", "Twitch", "fontawesome") -}}
|
||||
{% endblock icon_link_shortcuts -%}
|
||||
</ul>
|
||||
@@ -1,5 +0,0 @@
|
||||
{% extends "!layout.html" %}
|
||||
|
||||
{%- block footer %}
|
||||
{%- include "sections/footer.html" %}
|
||||
{%- endblock %}
|
||||
@@ -1,10 +0,0 @@
|
||||
<p>
|
||||
{%- if last_updated %}
|
||||
{% trans prefix=translate('Last updated on'), last_updated=last_updated|e %}{{ prefix }} {{ last_updated }}.{% endtrans %}<br/>
|
||||
{%- endif %}
|
||||
{%- if theme_extra_footer %}
|
||||
<div class="extra_footer">
|
||||
{{ theme_extra_footer }}
|
||||
</div>
|
||||
{%- endif %}
|
||||
</p>
|
||||
@@ -1,20 +0,0 @@
|
||||
<div class="rocm-footer">
|
||||
{%- include "components/social-links.html" with context -%}
|
||||
{% include 'components/copyright.html' %}
|
||||
<div class="rocm-footer-links">
|
||||
<ul>
|
||||
<li><a href="https://www.amd.com/en/corporate/copyright">Terms and Conditions (AMD)</a></li>
|
||||
<li><a href="#">Terms and Conditions (ROCm)</a></li>
|
||||
<li><a href="https://www.amd.com/en/corporate/privacy">Privacy</a></li>
|
||||
<li><a href="https://www.amd.com/en/corporate/cookies">Cookie Policy</a></li>
|
||||
<li><a href="https://www.amd.com/en/corporate/trademarks">Trademarks</a></li>
|
||||
<li><a href="https://www.amd.com/system/files/documents/statement-human-trafficking-forced-labor.pdf">Statement on Forced Labor</a></li>
|
||||
<li><a href="https://www.amd.com/en/corporate/competition">Fair and Open Competition</a></li>
|
||||
<li><a href="https://www.amd.com/system/files/documents/amd-uk-tax-strategy.pdf">UK Tax Strategy</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="rdc-watermark-container">
|
||||
<img id="rdc-watermark" src="{{ pathto('rdc-watermark.svg',1) }}" alt="DRAFT watermark"/>
|
||||
</div>
|
||||
@@ -1,48 +0,0 @@
|
||||
<div class="rocm-header">
|
||||
<div class="rocm-header-buttons">
|
||||
<a href="https://www.amd.com" class="rocm-header-link">
|
||||
<img id="amd-logo" alt="Advanced Micro Devices, Inc." src="{{ pathto('amd-header-logo.svg',1) }}"></img>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
<a href="{{ theme_repository_url }}" class="rocm-header-link">
|
||||
<div class="rocm-link-box">
|
||||
<p>GitHub</p>
|
||||
</div>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
<a href="https://github.com/RadeonOpenCompute/ROCm/discussions" class="rocm-header-link">
|
||||
<div class="rocm-link-box">
|
||||
<p>Community</p>
|
||||
</div>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
<a href="https://github.com/RadeonOpenCompute/ROCm/issues/new" class="rocm-header-link">
|
||||
<div class="rocm-link-box">
|
||||
<p>Support</p>
|
||||
</div>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
<a href="https://www.amd.com/en/technologies/infinity-hub" class="rocm-header-link">
|
||||
<div class="rocm-link-box">
|
||||
<p>Infinity Hub</p>
|
||||
</div>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
<a href="https://rocm.amd.com" class="rocm-header-link rocm-header-last" id="rocm-link">
|
||||
<img id="rocm-logo" alt="ROCm logo" src="{{ pathto('rocm-on.png',1) }}"></img>
|
||||
<div class="glow-wrap">
|
||||
<i class="glow"></i>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1,165 +0,0 @@
|
||||
defaults:
|
||||
numbered: False
|
||||
maxdepth: 6
|
||||
root: index
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: release
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: release/gpu_os_support
|
||||
- file: release/licensing
|
||||
- url: https://github.com/RadeonOpenCompute/ROCm/labels/Verified%20Issue
|
||||
title: Known Issues
|
||||
- file: release/compatibility
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: reference/framework_compatiblity/framework_compatiblity
|
||||
- file: reference/kernel_userspace_compatibility/kernel_userspace_comp
|
||||
|
||||
- entries:
|
||||
- file: deploy
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: quick_start
|
||||
- file: hip_sdk_install_win/hip_sdk_install_win
|
||||
- file: deploy/docker
|
||||
- file: deploy/install
|
||||
- file: deploy/multi
|
||||
- file: deploy/spack
|
||||
- file: deploy/build_source
|
||||
|
||||
|
||||
- caption: APIs and Reference
|
||||
entries:
|
||||
- file: reference/hip
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: HIP Runtime API
|
||||
url: https://advanced-micro-devices-hip-saad.readthedocs-hosted.com/en/wip-sphinx/
|
||||
- title: HIPify - Port Your Code
|
||||
url: https://advanced-micro-devices-demo--737.com.readthedocs.build/projects/HIPIFY/en/737/
|
||||
- file: reference/gpu_libraries/math
|
||||
title: Math Libraries
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: reference/gpu_libraries/blas
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: rocBLAS
|
||||
url: https://rocmdocs.amd.com/projects/rocBLAS/en/master/
|
||||
- title: hipBLAS
|
||||
url: https://rocmdocs.amd.com/projects/hipBLAS/en/master/
|
||||
- title: rocWMMA
|
||||
url: https://rocmdocs.amd.com/projects/rocWMMA/en/master/
|
||||
- file: reference/gpu_libraries/fft
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: rocFFT
|
||||
url: https://rocmdocs.amd.com/projects/rocFFT/en/master/
|
||||
- title: hipFFT
|
||||
url: https://rocmdocs.amd.com/projects/hipFFT/en/master/
|
||||
- file: reference/gpu_libraries/rand
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: rocRAND
|
||||
url: https://rocmdocs.amd.com/projects/rocRAND/en/master/
|
||||
- title: hipRAND
|
||||
url: https://rocmdocs.amd.com/projects/hipRAND/en/master/
|
||||
- file: reference/gpu_libraries/solver
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: rocSOLVER
|
||||
url: https://rocmdocs.amd.com/projects/rocSOLVER/en/master/
|
||||
- title: hipSOLVER
|
||||
url: https://rocmdocs.amd.com/projects/hipSOLVER/en/master/
|
||||
- file: reference/gpu_libraries/sparse
|
||||
subtrees:
|
||||
- entries:
|
||||
- title: rocSPARSE
|
||||
url: https://rocmdocs.amd.com/projects/rocSPARSE/en/master/
|
||||
- title: hipSPARSE
|
||||
url: https://rocmdocs.amd.com/projects/hipSPARSE/en/master/
|
||||
- file: reference/gpu_libraries/c++_primitives
|
||||
title: C++ Primitives
|
||||
subtrees:
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/rocPRIM/en/master/
|
||||
title: rocPRIM
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/hipCUB/en/master/
|
||||
title: hipCUB
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/rocThrust/en/master/
|
||||
title: rocThrust
|
||||
- file: reference/gpu_libraries/communication
|
||||
title: Communication Libraries
|
||||
subtrees:
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/RCCL/en/master/
|
||||
title: RCCL
|
||||
- url: https://rocmsoftwareplatform.github.io/MIOpen/doc/html/releasenotes.html
|
||||
title: MIOpen - Machine Intelligence
|
||||
- url: https://rocmsoftwareplatform.github.io/AMDMIGraphX/doc/html/
|
||||
title: MIGraphX- Graph Optimization
|
||||
- file: reference/computer_vision
|
||||
subtrees:
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/MIVisionX/en/master/
|
||||
title: MIVisionX
|
||||
- entries:
|
||||
- url: https://rocmdocs.amd.com/projects/rocAL/en/master/
|
||||
title: rocAL
|
||||
- file: reference/openmp/openmp
|
||||
title: OpenMP
|
||||
- file: reference/compilers
|
||||
title: Compilers and Tools
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: reference/rocmcc/rocmcc
|
||||
title: ROCmCC
|
||||
- url: http://profiler
|
||||
title: ROCGDB
|
||||
- url: http://profiler
|
||||
title: rocprof
|
||||
- url: http://profiler
|
||||
title: roctracer
|
||||
- url: http://profiler
|
||||
title: ROCdbgapi
|
||||
- file: reference/management_tools
|
||||
title: Management Tools
|
||||
subtrees:
|
||||
- entries:
|
||||
- url: http://smi
|
||||
title: rocmsmi
|
||||
- file: reference/gpu_arch
|
||||
- caption: Understand ROCm
|
||||
entries:
|
||||
- title: Compiler Disambiguation
|
||||
file: understand/compiler_disabiguation
|
||||
- file: isv_deployment_win
|
||||
- file: understand/deep_learning/deep_learning
|
||||
- file: understand/cmake_packages
|
||||
|
||||
- caption: How to Guides
|
||||
entries:
|
||||
- file: how_to/docker_gpu_isolation
|
||||
- file: how_to/magma_install/magma_install
|
||||
- file: how_to/pytorch_install/pytorch_install
|
||||
- file: how_to/tensorflow_install/tensorflow_install
|
||||
- file: how_to/system_debugging
|
||||
|
||||
- caption: Examples
|
||||
entries:
|
||||
- title: rocm-examples
|
||||
url: https://github.com/
|
||||
- file: examples/ai_ml_inferencing
|
||||
title: AI/ML/Inferencing
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: examples/inception_casestudy/inception_casestudy
|
||||
- file: examples/inception_casestudy_migraphx/inception_casestudy_migraphx
|
||||
|
||||
- caption: About
|
||||
entries:
|
||||
- file: about
|
||||
@@ -1,42 +0,0 @@
|
||||
import os
|
||||
from typing import Union
|
||||
from git import Repo, Remote, RemoteReference
|
||||
from pathlib import Path
|
||||
|
||||
def format_toc(repo_path: Union[str, os.PathLike, None] = None):
|
||||
pwd = Path(__file__).resolve().parent
|
||||
if repo_path is None:
|
||||
repo_path = pwd.parent
|
||||
at_start = True
|
||||
repo = Repo(repo_path, search_parent_directories=True)
|
||||
assert not repo.bare
|
||||
try:
|
||||
branch = repo.active_branch.name
|
||||
except TypeError as exc: # HEAD is detached commit
|
||||
checked_heads = []
|
||||
for head in repo.heads:
|
||||
checked_heads.append(head.name)
|
||||
if head.commit == repo.head.commit:
|
||||
branch = head.name
|
||||
break
|
||||
else: # loop fell through
|
||||
for remote in repo.remotes:
|
||||
remote: Remote
|
||||
for ref in remote.refs:
|
||||
ref: RemoteReference
|
||||
if ref.commit == repo.head.commit:
|
||||
branch = ref.name.split('/')[-1]
|
||||
break
|
||||
else: # loop fell through
|
||||
raise TypeError("A branch name could not be determined.\n(Checked heads: %s)" % ' '.join(checked_heads)) from exc
|
||||
with open(pwd / '_toc.yml.in', 'r', encoding='utf-8') as input:
|
||||
with open(pwd / '_toc.yml', 'w', encoding='utf-8') as output:
|
||||
for line in input.readlines():
|
||||
if line[0] == '#' and at_start:
|
||||
continue
|
||||
at_start = False
|
||||
output.write(line.format(branch=branch))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
format_toc()
|
||||
|
Before Width: | Height: | Size: 58 KiB |
|
Before Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 21 KiB |