Sync develop branch

This commit is contained in:
Sam Wu
2024-08-02 11:13:45 -06:00
76 changed files with 4961 additions and 11988 deletions

View File

@@ -11,11 +11,11 @@
* RadeonOpenCompute
* ROCmSoftwarePlatform
## Updating the changelog
## Updating the changelog and release notes
> IMPORTANT: It is key to update the template Markdown files in `tools/autotag/templates/rocm_changes` (eg: `5.6.0.md`) and not the `CHANGELOG.md` itself to ensure that updates are not overwritten by the autotag script. The template should only have content from changelogs that are not included by the script to avoid duplicating data.
> IMPORTANT: It is key to update the template Markdown files in `tools/autotag/templates/<name of change type>` (eg: `5.6.0.md`) and not the `CHANGELOG.md` or `RELEASE.md` itself to ensure that updates are not overwritten by the autotag script. The template should only have content from changelogs that are not included by the script to avoid duplicating data.
* Add or update the release specific notes in `tools/autotag/templates/rocm_changes`
* Add or update the release specific notes in `tools/autotag/templates/<name of change type>`
* Ensure the all the repositories have their release specific branch with the updated changelogs
* Run this for 5.6.0 (change for whatever version you require)
* `GITHUB_ACCESS_TOKEN=my_token_here`
@@ -26,10 +26,10 @@ To generate the changelog from 5.0.0 up to and including 6.1.2:
python3 tag_script.py -t $GITHUB_ACCESS_TOKEN --no-release --no-pulls --do-previous --compile_file ../../CHANGELOG.md --branch release/rocm-rel-6.1 6.1.2
```
To generate the changelog only for 6.1.2:
To generate the release notes only for 6.1.2:
```sh
python3 tag_script.py -t $GITHUB_ACCESS_TOKEN --no-release --no-pulls --compile_file ../../CHANGELOG.md --branch release/rocm-rel-6.1 6.1.2
python3 tag_script.py -t $GITHUB_ACCESS_TOKEN --no-release --no-pulls --compile_file ../../RELEASE.md --branch release/rocm-rel-6.1 6.1.2
```
### Notes

View File

@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest>
<remote name="rocm-org" fetch="https://github.com/ROCm/" />
<default revision="refs/tags/rocm-6.1.1"
remote="rocm-org"
sync-c="true"
sync-j="4" />
<!--list of projects for ROCm-->
<project category="libs" group="ml" name="composable_kernel" />
<project category="libs" group="ml" name="AMDMIGraphX" />
<project category="libs" group="ml" name="MIOpen" />
<project category="libs" group="ml" name="MIVisionX" />
<!-- rocAL -->
<project category="libs" group="ml" name="rocDecode" />
<project category="libs" group="ml" name="rpp" />
<project category="libs" group="communication" name="rccl" />
<project category="libs" group="math" name="half" />
<project category="libs" group="math" name="hipBLAS" />
<project category="libs" group="math" name="hipBLASLt" />
<project category="libs" group="math" name="hipFFT" />
<project category="libs" group="math" name="hipfort" />
<project category="libs" group="math" name="hipRAND" />
<project category="libs" group="math" name="hipSOLVER" />
<project category="libs" group="math" name="hipSPARSE" />
<project category="libs" group="math" name="hipSPARSELt" />
<project category="libs" group="math" name="rocALUTION" />
<project category="libs" group="math" name="rocBLAS" />
<project category="libs" group="math" name="rocFFT" />
<project category="libs" group="math" name="rocRAND" />
<project category="libs" group="math" name="rocSOLVER" />
<project category="libs" group="math" name="rocSPARSE" />
<project category="libs" group="math" name="rocWMMA" />
<project category="libs" group="math" name="Tensile" />
<project category="libs" group="primitives" name="hipCUB" />
<project category="libs" group="primitives" name="hipTensor" />
<project category="libs" group="primitives" name="rocPRIM" />
<project category="libs" group="primitives" name="rocThrust" />
<project category="tools" group="dev" name="HIPIFY" />
<project category="tools" group="dev" name="ROCdbgapi" />
<project category="tools" group="dev" name="rocm-cmake" />
<project category="tools" group="dev" name="ROCgdb" />
<project category="tools" group="dev" name="rocr_debug_agent" />
<!-- omniperf, omnitrace -->
<project category="tools" group="perf" name="rocm_bandwidth_test" />
<project category="tools" group="perf" name="rocprofiler" />
<project category="tools" group="perf" name="roctracer" />
<project category="tools" group="system" name="amdsmi" />
<project category="tools" group="system" name="rocminfo" />
<project category="tools" group="system" name="rdc" />
<project category="tools" group="system" name="rocm_smi_lib" />
<project category="tools" group="system" name="ROCmValidationSuite" />
<!-- transferbench -->
<project category="compilers" name="llvm-project" />
<project category="compilers" name="flang" path="openmp-extras/flang" />
<project category="runtimes" name="clr" />
<project category="runtimes" name="HIP" />
<project category="runtimes" name="ROCR-Runtime" />
<!--<project name="ROCK-Kernel-Driver" />-->
<!--<project name="ROCT-Thunk-Interface" />-->
<!--<project name="rocm-core" />-->
<!--<project name="rocprofiler-register" />-->
<!--<project name="clang-ocl" />-->
<!--HIP Projects-->
<!--<project name="hip-tests" />-->
<!--<project name="HIP-Examples" />-->
<!--<project name="hipother" />-->
<!-- Projects for OpenMP-Extras -->
<!--<project name="aomp" path="openmp-extras/aomp" />-->
<!--<project name="aomp-extras" path="openmp-extras/aomp-extras" />-->
</manifest>

View File

@@ -188,7 +188,7 @@ def run_tagging():
# Use the manifest included in the ROCm GitHub repository by default.
if args.manifest_url is None:
manifest_path = (
"./../../default.xml"
"./components.xml"
)
else:
manifest_url = args.manifest_url
@@ -233,31 +233,26 @@ def run_tagging():
)
# Find all the math libraries and their remotes.
included_names = [
"AMDMIGraphX",
"HIPIFY", #
"MIOpen",
"MIVisionX",
"ROCmValidationSuite", #
"composable_kernel",
"hipfort",
"rocDecode",
"rocm-cmake",
"rpp",
]
included_groups = [
"mathlibs"
included_categories = [
"libs",
"tools",
"compilers",
"runtimes",
]
projects = [ ]
for project in manifest_tree.iterfind(".//project"):
include = str(project.get("name")) in included_names
if (project.get("name") in included_names) or (project.get("groups") in included_groups):
if project.get("category") in included_categories:
projects.append(project)
names_and_remotes = list((entry.get("name"), entry.get("remote")) for entry in projects)
component_information = list(
(entry.get("name"),
entry.get("remote"),
entry.get("group"),
entry.get("category"),
) for entry in projects)
# Get all the relevant ROCm releases, and only the last version if not doing previous.
minimum_version = "5.0.0" if args.previous else args.version
releases = release_bundle_factory.create_data_dict(args.version, names_and_remotes, minimum_version)
releases = release_bundle_factory.create_data_dict(args.version, component_information, minimum_version)
# Process the individual releases.
failed: List[Tuple[str, str]] = []

View File

@@ -23,19 +23,23 @@ This page contains the release notes for AMD ROCm™ Software.
-------------------
## ROCm {{version}}
{%- set rocm_changes = "./rocm_changes/" ~ version ~ ".md" %}
{% include rocm_changes ignore missing %}
{{- "\n\n" -}}
{%- set highlights = "./highlights/" ~ version ~ ".md" %}
{%- include highlights ignore missing -%}
{{- "\n\n" -}}
{%- set support = "./support/" ~ version ~ ".md" %}
{%- include support ignore missing -%}
### Library changes in ROCm {{version}}
| Library | Version |
|---------|---------|
{%- for lib_name, lib in release.libraries | dictsort %}
| Category | Group | Name | Version | Repository |
|----------|-------|------|---------|------------|
{%- for lib_name in release.libraries %}
{%- set lib = release.libraries[lib_name] %}
{%- if rocm_ver_by_lib_ver[lib_name][lib.lib_version] == version and (prev_lib_ver[lib_name][lib.lib_version] | default([]) | length > 0) and lib.lib_version %}
| {{ lib_name }} | {{prev_lib_ver[lib_name][lib.lib_version]}} ⇒ [{{ lib.lib_version }}]({{ lib.release_url }}) |
| {{ lib.category }} | {{ lib.group }} | [{{ lib_name }}]({{ lib.documentation_page }}) | {{prev_lib_ver[lib_name][lib.lib_version]}} ⇒ [{{ lib.lib_version }}]({{ lib.release_url }}) | [ROCm/{{ lib_name }}]({{ lib.repository_url }}) |
{%- elif lib.lib_version %}
| {{ lib_name }} | [{{ lib.lib_version }}]({{ lib.release_url }}) |
| {{ lib.category }} | {{ lib.group }} | [{{ lib_name }}]({{ lib.documentation_page }}) | [{{ lib.lib_version }}]({{ lib.release_url }}) | [ROCm/{{ lib_name }}]({{ lib.repository_url }}) |
{%- endif %}
{%- endfor %}
@@ -53,7 +57,17 @@ This page contains the release notes for AMD ROCm™ Software.
{{change|trim|e}}
{%- endfor %}{# change in lib.data.changes #}
{%- endif %}
{%- endif -%}
{%- endfor %}{# lib in release.libraries #}
{{- "\n\n" -}}
{%- set extra_components = "./extra_components/" ~ version ~ ".md" %}
{%- include extra_components ignore missing -%}
{{- "\n\n" -}}
{%- set known_issues = "./known_issues/" ~ version ~ ".md" %}
{%- include known_issues ignore missing -%}
{{- "\n\n" -}}
{%- set upcoming_changes = "./upcoming_changes/" ~ version ~ ".md" %}
{%- include upcoming_changes ignore missing -%}
{%- endfor %}{# release in releases #}
{# EOF #}

View File

@@ -1,10 +1,3 @@
ROCm 6.1.2 includes enhancements to SMI tools and improvements to some libraries.
### OS support
ROCm 6.1.2 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).
### AMD SMI
AMD SMI for ROCm 6.1.2
@@ -42,16 +35,6 @@ AMD SMI for ROCm 6.1.2
See the AMD SMI [detailed changelog](https://github.com/ROCm/amdsmi/blob/rocm-6.1.x/CHANGELOG.md) with code samples for more information.
```
### HIPCC
HIPCC for ROCm 6.1.2
#### Changes
* **Upcoming:** a future release will enable use of compiled binaries `hipcc.bin` and `hipconfig.bin` by default. No action is needed by users; you may continue calling high-level Perl scripts `hipcc` and `hipconfig`. `hipcc.bin` and `hipconfig.bin` will be invoked by the high-level Perl scripts. To revert to the previous behavior and invoke `hipcc.pl` and `hipconfig.pl`, set the `HIP_USE_PERL_SCRIPTS` environment variable to `1`.
* **Upcoming:** a subsequent release will remove high-level Perl scripts `hipcc` and `hipconfig`. This release will remove the `HIP_USE_PERL_SCRIPTS` environment variable. It will rename `hipcc.bin` and `hipconfig.bin` to `hipcc` and `hipconfig` respectively. No action is needed by the users. To revert to the previous behavior, invoke `hipcc.pl` and `hipconfig.pl` explicitly.
* **Upcoming:** a subsequent release will remove `hipcc.pl` and `hipconfig.pl`.
### ROCm SMI
ROCm SMI for ROCm 6.1.2

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
ROCm 6.1.2 includes enhancements to SMI tools and improvements to some libraries.

View File

@@ -0,0 +1,223 @@
The release notes provide a comprehensive summary of changes since the previous ROCm release.
- [Release highlights](release-highlights)
- [Operating system and hardware support changes](operating-system-and-hardware-support-changes)
- [ROCm components versioning](rocm-components)
- [Detailed component changes](detailed-component-changes)
- [ROCm known issues](rocm-known-issues)
- [ROCm upcoming changes](rocm-upcoming-changes)
The [Compatibility matrix](https://rocm.docs.amd.com/en/latest/release/docs/6.2.0/compatibility/compatibility-matrix)
provides an overview of operating system, hardware, ecosystem, and ROCm component support across ROCm releases.
Release notes for previous ROCm releases are available in earlier versions of the documentation.
See the [ROCm documentation release history](https://rocm.docs.amd.com/en/latest/release/versions).
## Release highlights
This section introduces notable new features and improvements in ROCm 6.2. See the
[Detailed component changes](#detailed-component-changes) for individual component changes.
### New components
ROCm 6.2.0 introduces the following new components to the ROCm software stack.
- **Omniperf** -- A kernel-level profiling tool for machine learning and high-performance computing (HPC) workloads
running on AMD Instinct accelerators. Omniperf offers comprehensive profiling and advanced analysis via command line
or a GUI dashboard. For more information, see
[Omniperf](https://rocm.docs.amd.com/projects/omniperf/en/latest).
- **Omnitrace** -- A multi-purpose analysis tool for profiling and tracing applications running on the CPU or the CPU and GPU.
It supports dynamic binary instrumentation, call-stack sampling, causal profiling, and other features for determining
which function and line number are executing. For more information, see
[Omnitrace](https://rocm.docs.amd.com/projects/omnitrace/en/latest).
- **rocPyDecode** -- A tool to access rocDecode APIs in Python. It connects Python and C/C++ libraries,
enabling function calling and data passing between the two languages. The `rocpydecode.so` library, a wrapper, uses
rocDecode APIs written primarily in C/C++ within Python. For more information, see
[rocPyDecode](https://rocm.docs.amd.com/projects/rocpydecode/en/latest).
- **ROCprofiler-SDK** -- ROCprofiler-SDK is a profiling and tracing library for HIP and ROCm applications on AMD ROCm software
used to identify application performance bottlenecks and optimize their performance. The new APIs add restrictions for more
efficient implementations and improved thread safety. A new window restriction specifies the services the tool can use.
ROCprofiler-SDK also provides a tool library to help you write your tool implementations. `rocprofv3` uses this tool library
to profile and trace applications for performance bottlenecks. Examples include API tracing, kernel tracing, and so on.
For more information, see [ROCprofiler-SDK](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest).
```{note}
ROCprofiler-SDK for ROCm 6.2.0 is a beta release and subject to change.
```
### ROCm Offline Installer Creator introduced
The new ROCm Offline Installer Creator creates an installation package for a preconfigured setup of ROCm, the AMDGPU
driver, or a combination of the two on a target system without network access. This new tool customizes
multiple unique configurations for use when installing ROCm on a target. Other notable features include:
* A lightweight, easy-to-use user interface for configuring the creation of the installer
* Support for multiple Linux distributions
* Installer support for different ROCm releases and specific ROCm components
* Optional driver or driver-only installer creation
* Optional post-install preferences
* Lightweight installer packages, which are unique to the preconfigured ROCm setup
* Resolution and inclusion of dependency packages for offline installation
For more information, see
[ROCm Offline Installer Creator](https://rocm.docs.amd.com/projects/rocm-install-on-linux/en/latest/install/rocm-offline-installer.html).
### Math libraries default to Clang instead of HIPCC
The default compiler used to build the math libraries on Linux changes from `hipcc` to `amdclang++`.
Appropriate compiler flags are added to ensure these compilations build correctly. This change only applies when
building the libraries. Applications using the libraries can continue to be compiled using `hipcc` or `amdclang++` as
described in [ROCm compiler reference](https://rocm.docs.amd.com/projects/llvm-project/en/latest/reference/rocmcc.html).
The math libraries can also be built with `hipcc` using any of the previously available methods (for example, the `CXX`
environment variable, the `CMAKE_CXX_COMPILER` CMake variable, and so on). This change shouldn't affect performance or
functionality.
### Framework and library changes
This section highlights updates to supported deep learning frameworks and notable third-party library optimizations.
#### Additional PyTorch and TensorFlow support
ROCm 6.2.0 supports PyTorch versions 2.2 and 2.3 and TensorFlow version 2.16.
See [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html)
and [Installing TensorFlow for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/tensorflow-install.html)
for installation instructions.
Refer to the
[Third-party support matrix](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/3rd-party-support-matrix.html#deep-learning)
for a comprehensive list of third-party frameworks and libraries suppported by ROCm.
#### Optimized framework support for OpenXLA
PyTorch for ROCm and TensorFlow for ROCm now provide native support for OpenXLA. OpenXLA is an open-source ML compiler
ecosystem that enables developers to compile and optimize models from all leading ML frameworks. For more information, see
[Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html)
and [Installing TensorFlow for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/tensorflow-install.html).
#### PyTorch support for Autocast (automatic mixed precision)
PyTorch now supports Autocast for recurrent neural networks (RNNs) on ROCm. This can help to reduce computational
workloads and improve performance. Based on the information about the magnitude of values, Autocast can substitute the
original `float32` linear layers and convolutions with their `float16` or `bfloat16` variants. For more information, see
[Automatic mixed precision](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/train-a-model#automatic-mixed-precision-amp).
#### Memory savings for bitsandbytes model quantization
The [ROCm-aware bitsandbytes library](https://github.com/ROCm/bitsandbytes) is a lightweight Python wrapper around HIP
custom functions, in particular 8-bit optimizer, matrix multiplication, and 8-bit and 4-bit quantization functions.
ROCm 6.2.0 introduces the following bitsandbytes changes:
- `Int8` matrix multiplication is enabled, and it includes the following functions:
- `extract-outliers` extracts rows and columns that have outliers in the inputs. Theyre later used for matrix multiplication without quantization.
- `transform` row-to-column and column-to-row transformations are enabled, along with transpose operations. These are used before and after matmul computation.
- `igemmlt` new function for GEMM computation A*B^T. It uses
[hipblasLtMatMul](https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/api-reference.html#hipblasltmatmul) and performs 8-bit GEMM operations.
- `dequant_mm` dequantizes output matrix to original data type using scaling factors from vector-wise quantization.
- Blockwise quantization input tensors are quantized for a fixed block size.
- 4-bit quantization and dequantization functions normalized `Float4` quantization, quantile estimation, and quantile quantization functions are enabled.
- 8-bit and 32-bit optimizers are enabled.
```{note}
These functions are included in bitsandbytes. They are not part of ROCm. However, ROCm 6.2.0 has enabled the fixes and
features to run them.
```
For more information, see [Model quantization techniques](https://rocm.docs.amd.com/en/latest/how-to/llm-fine-tuning-optimization/model-quantization.html).
#### Improved vLLM support
ROCm 6.2.0 enhances vLLM support for inference on AMD Instinct accelerators, adding
capabilities for `FP16`/`BF16` precision for LLMs, and `FP8` support for Llama.
ROCm 6.2.0 adds support for the following vLLM features:
- MP:
Multi-GPU execution. Choose between MP and Ray using a flag. To set it to MP,
use `--distributed-executor-backed=mp`. The default depends on the commit in flux.
- FP8 KV cache:
Enhances computational efficiency and performance by significantly reducing memory usage and bandwidth requirements.
The QUARK quantizer currently only supports Llama.
- Triton Flash Attention:
ROCm supports both Triton and Composable Kernel Flash Attention 2 in vLLM. The default is Triton, but you can change this
setting using the `VLLM_USE_FLASH_ATTN_TRITON=False` environment variable.
- PyTorch TunableOp:
Improved optimization and tuning of GEMMs. It requires Docker with PyTorch 2.3 or later.
For more information about enabling these features, see
[vLLM inference](https://rocm.docs.amd.com/en/latest/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.html#vllm-inference).
ROCm has a vLLM branch for experimental features. This includes performance improvements, accuracy, and correctness testing.
These features include:
- FP8 GEMMs: To improve the performance of FP8 quantization, work is underway on tuning the GEMM using the shapes used
in the model's execution. It only supports LLAMA because the QUARK quantizer currently only supports Llama.
- Custom decode paged attention: Improves performance by efficiently managing memory and enabling faster attention
computation in large-scale models. This benefits all workloads in `FP16` configurations.
To enable these experimental new features, see
[vLLM inference](https://rocm.docs.amd.com/en/latest/how-to/llm-fine-tuning-optimization/llm-inference-frameworks.html#vllm-inference).
Use the `rocm/vllm` branch when cloning the GitHub repo. The `vllm/ROCm_performance.md` document outlines
all the accessible features, and the `vllm/Dockerfile.rocm` file can be used.
### Enhanced performance tuning on AMD Instinct accelerators
ROCm is pretuned for high-performance computing workloads including large language models, generative AI, and scientific computing.
The ROCm documentation provides comprehensive guidance on configuring your system for AMD Instinct accelerators. It includes
detailed instructions on system settings and application tuning suggestions to help you fully leverage the capabilities of these
accelerators for optimal performance. For more information, see
[AMD MI300X tuning guides](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/index.html) and
[AMD MI300A system optimization](https://rocm.docs.amd.com/en/latest/how-to/system-optimization/mi300x.html).
### Removed clang-ocl
As of version 6.2, ROCm no longer provides the `clang-ocl` package. The project will be archived in the future.
See the [clang-ocl README](https://github.com/ROCm/clang-ocl).
### ROCm documentation changes
The documentation for the ROCm components has been reorganized and reformatted in a standard look and feel. This
improves the usability and readability of the documentation. For more information about the ROCm components, see
[What is ROCm?](https://rocm.docs.amd.com/en/latest/what-is-rocm.html).
Since the release of ROCm 6.1, the documentation has added some key topics including:
- [AMD Instinct MI300X workload tuning guide](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html)
- [AMD Instinct MI300X system tuning guide](https://rocm.docs.amd.com/en/latest/how-to/system-optimization/mi300x.html)
- [AMD Instinct MI300A system tuning guide](https://rocm.docs.amd.com/en/latest/how-to/system-optimization/mi300a.html)
- [Using ROCm for AI](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/index.html)
- [Using ROCm for HPC](https://rocm.docs.amd.com/en/latest/how-to/rocm-for-hpc/index.html)
- [Fine-tuning LLMs and inference optimization](https://rocm.docs.amd.com/en/latest/how-to/llm-fine-tuning-optimization/index.html)
- [LLVM reference documentation](https://rocm.docs.amd.com/projects/llvm-project/en/latest/)
The following topics have been significantly improved, expanded, or both:
- [HIP programming manual](https://rocm.docs.amd.com/projects/HIP/en/latest/)
- [Compatibility matrix](https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html)
```{note}
All ROCm projects are open source and available on GitHub. To contribute to ROCm documentation, see the
[ROCm documentation contribution guidelines](https://rocm.docs.amd.com/en/latest/contribute/contributing.html).
```

View File

@@ -0,0 +1,3 @@
### OS support
ROCm 6.1.2 has been tested against a pre-release version of Ubuntu 22.04.5 (kernel: 5.15 [GA], 6.8 [HWE]).

View File

@@ -0,0 +1,27 @@
## Operating system and hardware support changes
ROCm 6.2.0 adds support for the following operating system and kernel versions.
- Ubuntu 24.04 LTS (kernel: 6.8 [GA])
- RHEL 8.10 (kernel: 4.18.0-544)
- SLES 15 SP6 (kernel: 6.4)
ROCm 6.2.0 marks the end of support (EoS) for:
- Ubuntu 22.04.3
- RHEL 9.2
- RHEL 8.8
- SLES 15 SP 4
- CentOS 7.9
ROCm 6.2.0 has been tested against pre-release Ubuntu 22.04.5 (kernel: 6.5 [HWE]).
See the [Compatibility matrix](https://rocm-stg.amd.com/en/docs/6.2.0/compatibility/compatibility-matrix.html) for an
overview of supported operating systems and hardware architectures.

View File

@@ -0,0 +1,9 @@
### HIPCC
HIPCC for ROCm 6.1.2
#### Changes
* **Upcoming:** a future release will enable use of compiled binaries `hipcc.bin` and `hipconfig.bin` by default. No action is needed by users; you may continue calling high-level Perl scripts `hipcc` and `hipconfig`. `hipcc.bin` and `hipconfig.bin` will be invoked by the high-level Perl scripts. To revert to the previous behavior and invoke `hipcc.pl` and `hipconfig.pl`, set the `HIP_USE_PERL_SCRIPTS` environment variable to `1`.
* **Upcoming:** a subsequent release will remove high-level Perl scripts `hipcc` and `hipconfig`. This release will remove the `HIP_USE_PERL_SCRIPTS` environment variable. It will rename `hipcc.bin` and `hipconfig.bin` to `hipcc` and `hipconfig` respectively. No action is needed by the users. To revert to the previous behavior, invoke `hipcc.pl` and `hipconfig.pl` explicitly.
* **Upcoming:** a subsequent release will remove `hipcc.pl` and `hipconfig.pl`.

View File

@@ -0,0 +1,79 @@
## ROCm known issues
ROCm known issues are noted on [{fab}`github` GitHub](https://github.com/ROCm/ROCm/labels/Verified%20Issue). For known
issues related to individual components, review the [Detailed component changes](detailed-component-changes).
### Default processor affinity behavior for helper threads
Processor affinity is a critical setting to ensure that ROCm helper threads run on the correct cores. By default, ROCm
helper threads are spawned on all available cores, ignoring the parent threads processor affinity. This can lead to
threads competing for available cores, which may result in suboptimal performance. This behavior occurs by default if
the environment variable `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is not set or is set to `1`. If
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` is set to `0`, the ROCr runtime uses the parent process's core affinity mask when
creating helper threads. The parents affinity mask should then be set to account for the presence of additional threads
by ensuring the affinity mask contains enough cores. Depending on the affinity settings of the software environment,
batch system, launch commands like `numactl`/`taskset`, or explicit mask manipulation by the application itself, changing
the setting may be advantageous to performance.
To ensure the parent's core affinity mask is honored by the ROCm helper threads, set the
`HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable as follows:
```{code} shell
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=0
```
To ensure ROCm helper threads run on all available cores, set the `HSA_OVERRIDE_CPU_AFFINITY_DEBUG` environment variable
as follows:
``` shell
export HSA_OVERRIDE_CPU_AFFINITY_DEBUG=1
```
Or the default:
``` shell
unset HSA_OVERRIDE_CPU_AFFINITY_DEBUG
```
If unsure of the default processor affinity settings for your environment, run the following command from the shell:
``` shell
bash -c "echo taskset -p \$\$"
```
### KFDTest failure on Instinct MI300X with Oracle Linux 8.9
The `KFDEvictTest.QueueTest` is failing on the MI300X platform during KFD (Kernel Fusion Driver) tests, causing the full
suite to not execute properly. This issue is suspected to be hardware-related.
### Bandwidth limitation in gang and non-gang modes on Instinct MI300A
Expected target peak non-gang performance (~60GB/s) and target peak gang performance (~90GB/s) are not achieved. Both gang
and non-gang performance are observed to be limited at 45GB/s.
This issue will be addressed in a future ROCm release.
### rocm-llvm-alt
ROCm provides an optional package -- `rocm-llvm-alt` -- that provides a closed-source compiler for
users interested in additional closed-source CPU optimizations. This feature is not functional in
the ROCm 6.2.0 release. Users who attempt to invoke the closed-source compiler will experience an
LLVM consumer-producer mismatch and the compilation will fail. There is no workaround that allows
use of the closed-source compiler. It is recommended to compile using the default open-source
compiler, which generates high-quality AMD CPU and AMD GPU code.
## ROCm upcoming changes
The section notes upcoming changes to the ROCm software stack. For upcoming changes related to individual components, review
the [Detailed component changes](detailed-component-changes).
### rocm-llvm-alt
The `rocm-llvm-alt` package will be removed in an upcoming release. Users relying on the
functionality provided by the closed-source compiler should transition to the open-source compiler.
Once the `rocm-llvm-alt` package is removed, any compilation requesting functionality provided by
the closed-source compiler will result in a Clang warning: "*[AMD] proprietary optimization compiler
has been removed*".

View File

@@ -16,7 +16,7 @@ class Changelog():
def __init__(self, releases: Dict[str, ReleaseBundle]):
self.releases = list(releases.items())
self.releases.sort(key=lambda x: Version(x[0]), reverse=True)
# self.releases.sort(key=lambda x: Version(x[0]), reverse=True)
# For each library find the earliest ROCm release where it updated.
rocm_ver_by_lib_ver: Dict[str, Dict[str, str]] = defaultdict(dict)
@@ -53,4 +53,4 @@ class Changelog():
prev_lib_ver=self.prev_lib_ver
)
output.write(content)
output.write(content)

View File

@@ -0,0 +1,20 @@
category_mapping = {
"libs": "Libraries",
"tools": "Tools",
"compilers": "Compilers",
"runtimes": "Runtimes",
"": "",
None: "",
}
group_mapping = {
"ml": "Machine Learning and Computer Vision",
"communication": "Communication",
"math": "Math",
"primitives": "Primitives",
"dev": "Development",
"perf": "Performance",
"system": "System",
"": "",
None: "",
}

View File

@@ -1,28 +1,32 @@
"""Class to store data about a particular release."""
from dataclasses import dataclass, field
import os
import re
import shutil
import sys
from typing import Optional, Union, Dict, List, Tuple
from github import Github, UnknownObjectException
from github.Repository import Repository
from github.Organization import Organization
from github.NamedUser import NamedUser
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Union
from git import Repo
from git.cmd import Git
from github import Github, UnknownObjectException
from github.NamedUser import NamedUser
from github.Organization import Organization
from github.Repository import Repository
from packaging.version import Version
from util.util import get_yn_input
from util.mappings import category_mapping, group_mapping
@dataclass
class ReleaseData:
"""Store Github data for a release."""
message: str = ""
notes: str = ""
changes: Dict[str, str] = field(default_factory=dict)
@dataclass
class ReleaseLib:
"""Store data about a release for a particular library."""
@@ -34,6 +38,8 @@ class ReleaseLib:
commit: str = ""
rocm_version: str = ""
lib_version: str = ""
group: str = ""
category: str = ""
@property
def qualified_repo(self) -> str:
@@ -64,6 +70,16 @@ class ReleaseLib:
def release_url(self) -> str:
"""The Github URL of the release."""
return f"https://github.com/{self.qualified_repo}/releases/tag/{self.tag}"
@property
def documentation_page(self) -> str:
"""The Read the Docs documentation site."""
return f"https://rocm.docs.amd.com/projects/{self.qualified_repo}/en/latest"
@property
def repository_url(self) -> str:
"""The GitHub repository URL."""
return f"https://github.com/ROCm/{self.qualified_repo}"
@property
def message(self) -> str:
@@ -92,9 +108,7 @@ class ReleaseLib:
print(f"Release Message: '{self.data.message}'")
print(f"Release Notes:\n{self.data.notes}")
print(f"Release Commit: '{self.commit}'")
if get_yn_input(
"Would you like to create this tag and release?", release_yn
):
if get_yn_input("Would you like to create this tag and release?", release_yn):
try:
print("Performing tag and release.")
release = self.repo.create_git_tag_and_release(
@@ -142,9 +156,7 @@ class ReleaseLib:
fork.push(f"refs/heads/release:refs/heads/{self.branch}")
shutil.rmtree(repo_loc)
pr_title = (
f"Hotfixes from {self.branch} at release {self.full_version}"
)
pr_title = f"Hotfixes from {self.branch} at release {self.full_version}"
pr_body = (
"This is an autogenerated PR.\n This is intended to pull any"
f" hotfixes for ROCm release {self.full_version} (including"
@@ -159,10 +171,11 @@ class ReleaseLib:
print(f"Pull request created: {pr.html_url}")
return pr
class ReleaseDataFactory:
"""A factory for ReleaseData objects."""
lib_versions: Dict[str, str] = { }
lib_versions: Dict[str, str] = {}
"""A map of commit hashes to lib versions."""
def __init__(
@@ -176,7 +189,9 @@ class ReleaseDataFactory:
else:
self.org, self.pr_org = self.get_org_or_user(org_name)
def get_org_or_user(self, name: str) -> Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]:
def get_org_or_user(
self, name: str
) -> Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]:
"""Get a Github organization or user by name."""
gh_ns: Union[NamedUser, Organization]
pr_ns: Union[NamedUser, Organization]
@@ -188,12 +203,10 @@ class ReleaseDataFactory:
gh_ns = self.gh.get_user(name)
pr_ns = self.pr_gh.get_user(name)
except UnknownObjectException as err:
raise ValueError(
f"Could not find organization/user {name}."
) from err
raise ValueError(f"Could not find organization/user {name}.") from err
return gh_ns, pr_ns
def create_data(
def create_release_lib_data(
self,
name: str,
commit: str,
@@ -219,6 +232,7 @@ class ReleaseDataFactory:
)
return data
@dataclass
class ReleaseBundle:
"""Stores data about all the libraries bundled in this release."""
@@ -226,6 +240,7 @@ class ReleaseBundle:
version: str = ""
libraries: Dict[str, ReleaseLib] = field(default_factory=ReleaseLib)
class ReleaseBundleFactory:
gh: Github = None
@@ -234,16 +249,18 @@ class ReleaseBundleFactory:
default_remote: str = ""
"""The default fallback remote."""
remotes: Dict[str, str] = { }
remotes: Dict[str, str] = {}
"""A dictionary translating the manifest remote shorthand to the full name."""
tags: Dict[str, Dict[Version, str]] = { }
tags: Dict[str, Dict[Version, str]] = {}
"""A dictionary with all the ROCm version numbers and commit sha for each library."""
orgs_and_users: Dict[str, Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]] = { }
orgs_and_users: Dict[
str, Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]
] = {}
"""A dictionary containing the base and PR user or organization for each project."""
pr_repos: Dict[str, Tuple[Repo, Repo]] = { }
pr_repos: Dict[str, Tuple[Repo, Repo]] = {}
"""A dictionary containing the base and PR repo for each project."""
def __init__(
@@ -253,15 +270,15 @@ class ReleaseBundleFactory:
pr_gh: Github,
default_remote: str,
remotes: Dict[str, str],
branch: Optional[str]
branch: Optional[str],
):
# Store Github data
self.gh = gh
self.gh = gh
self.pr_gh = pr_gh
self.default_remote = default_remote
self.remotes = remotes
self.branch = branch
self.remotes = remotes
self.branch = branch
# Get the main repository:
self.rocm_repo = gh.get_repo(rocm_repo)
@@ -271,8 +288,10 @@ class ReleaseBundleFactory:
if remote in self.remotes:
return self.remotes[remote]
return self.default_remote
def get_org_or_user(self, remote: str) -> Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]:
def get_org_or_user(
self, remote: str
) -> Tuple[Union[NamedUser, Organization], Union[NamedUser, Organization]]:
"""Gets the base and PR organization or user associated to a remote."""
if remote not in self.orgs_and_users:
try:
@@ -329,7 +348,7 @@ class ReleaseBundleFactory:
def fetch_tags(self, url: str) -> Dict[Version, str]:
"""Fetches a version-sha map for a given Git URL."""
result: Dict[Version, str] = { }
result: Dict[Version, str] = {}
for line in Git().ls_remote("--tags", url).split("\n"):
column = line.split("\t")
sha = column[0]
@@ -344,20 +363,23 @@ class ReleaseBundleFactory:
result[Version(rocm_ver)] = sha
return result
def create_data(
def create_release_bundle_data(
self,
version: Version,
names_and_remotes: List[Tuple[str, str]],
is_untagged: bool=False
component_info: List[Tuple[str, str]],
is_untagged: bool = False,
) -> ReleaseBundle:
"""Create a release bundle of libraries."""
tag_name = f"rocm-{version}"
libraries = { }
libraries = {}
missing_branches = []
prev_group = None
prev_category = None
print(f"\nLibraries for rocm-{version}:")
for name, remote in names_and_remotes:
for name, remote, group, category in component_info:
repo, pr_repo = self.get_repos(name, remote)
# Find the tag and otherwise
@@ -375,20 +397,30 @@ class ReleaseBundleFactory:
print(f" - Could not find branch : {self.branch}")
missing_branches.append(f"{self.branch} for {name}")
continue
if prev_group == group:
group = ""
else:
prev_group = group
if prev_category == category:
category = ""
else:
prev_category = category
libraries[name] = ReleaseLib(
name=name,
repo=repo,
pr_repo=pr_repo,
commit=commit,
rocm_version=str(version),
group=group_mapping[group],
category=category_mapping[category],
)
print(f"- {name:11} {commit}")
data = ReleaseBundle(
version=version,
libraries=libraries
)
data = ReleaseBundle(version=version, libraries=libraries)
for missing in missing_branches:
print(f"Could not find the following branch: {missing}")
@@ -398,8 +430,8 @@ class ReleaseBundleFactory:
def create_data_dict(
self,
up_to_version: str,
names_and_remotes: List[Tuple[str, str]],
min_version: str = "5.0.0"
component_information: List[Tuple[str, str]],
min_version: str = "5.0.0",
) -> Dict[str, ReleaseBundle]:
"""Create a map of versions and release bundles."""
@@ -417,6 +449,8 @@ class ReleaseBundleFactory:
for version in versions:
if version >= Version(min_version) and version <= max_version:
can_be_untagged = version == max_version
data[str(version)] = self.create_data(version, names_and_remotes, can_be_untagged)
data[str(version)] = self.create_release_bundle_data(
version, component_information, can_be_untagged
)
return data