mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-04 03:15:28 -05:00
Fix some linting issues (#2046)
This commit is contained in:
@@ -9,7 +9,7 @@ yourself with our documentation toolchain.
|
||||
|
||||
[ReadTheDocs](https://docs.readthedocs.io/en/stable/) is our frontend for the
|
||||
our documentation. By frontend, this is the tool that serves our HTML based
|
||||
documentation to our end users.
|
||||
documentation to our end users.
|
||||
|
||||
## Doxygen
|
||||
|
||||
@@ -28,8 +28,9 @@ projects that believe markdown is not suitable should contact the documentation
|
||||
team prior to selecting rst.
|
||||
|
||||
### MyST
|
||||
[Markedly Structured Text (MyST)](https://myst-tools.org/docs/spec) is an extended
|
||||
flavor of Markdown ([https://commonmark.org/](CommonMark)) influenced by ReStructured
|
||||
|
||||
[Markedly Structured Text (MyST)](https://myst-tools.org/docs/spec) is an extended
|
||||
flavor of Markdown ([https://commonmark.org/](CommonMark)) influenced by ReStructured
|
||||
Text (RST) and Sphinx.
|
||||
It is intergrated via [`myst-parser`](https://myst-parser.readthedocs.io/en/latest/).
|
||||
A cheat sheet that showcases how to use the MyST syntax is available over at [the Jupyter
|
||||
|
||||
@@ -176,7 +176,7 @@ Follow these steps:
|
||||
for line in f.readlines():
|
||||
split_line = line.split('\t')
|
||||
val_dict[split_line[0]] = split_line[1]
|
||||
|
||||
|
||||
paths = glob.glob('./tiny-imagenet-200/val/images/*')
|
||||
for path in paths:
|
||||
file = path.split('/')[-1]
|
||||
@@ -184,13 +184,13 @@ Follow these steps:
|
||||
if not os.path.exists(target_folder + str(folder)):
|
||||
os.mkdir(target_folder + str(folder))
|
||||
os.mkdir(target_folder + str(folder) + '/images')
|
||||
|
||||
|
||||
for path in paths:
|
||||
file = path.split('/')[-1]
|
||||
folder = val_dict[file]
|
||||
dest = target_folder + str(folder) + '/images/' + str(file)
|
||||
move(path, dest)
|
||||
|
||||
|
||||
rmdir('./tiny-imagenet-200/val/images')
|
||||
```
|
||||
|
||||
@@ -201,7 +201,7 @@ Follow these steps:
|
||||
```py
|
||||
import torch
|
||||
import os
|
||||
import torchvision
|
||||
import torchvision
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms.functional import InterpolationMode
|
||||
```
|
||||
@@ -231,7 +231,7 @@ Follow these steps:
|
||||
9. To smooth the image, use bilinear interpolation, a resampling method that uses the distance weighted average of the four nearest pixel values to estimate a new pixel value.
|
||||
|
||||
```py
|
||||
interpolation = "bilinear"
|
||||
interpolation = "bilinear"
|
||||
```
|
||||
|
||||
The next parameters control the size to which the validation image is cropped and resized.
|
||||
@@ -244,7 +244,7 @@ Follow these steps:
|
||||
The pretrained Inception v3 model is chosen to be downloaded from torchvision.
|
||||
|
||||
```py
|
||||
model_name = "inception_v3"
|
||||
model_name = "inception_v3"
|
||||
pretrained = True
|
||||
```
|
||||
|
||||
@@ -289,9 +289,9 @@ Follow these steps:
|
||||
|
||||
```py
|
||||
interpolation = InterpolationMode(interpolation)
|
||||
|
||||
|
||||
TRAIN_TRANSFORM_IMG = transforms.Compose([
|
||||
Normalizaing and standardardizing the image
|
||||
Normalizaing and standardardizing the image
|
||||
transforms.RandomResizedCrop(train_crop_size, interpolation=interpolation),
|
||||
transforms.PILToTensor(),
|
||||
transforms.ConvertImageDtype(torch.float),
|
||||
@@ -310,16 +310,16 @@ Follow these steps:
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225] )
|
||||
])
|
||||
|
||||
dataset_test = torchvision.datasets.ImageFolder(
|
||||
val_dir,
|
||||
|
||||
dataset_test = torchvision.datasets.ImageFolder(
|
||||
val_dir,
|
||||
transform=TEST_TRANSFORM_IMG
|
||||
)
|
||||
|
||||
|
||||
print("Creating data loaders")
|
||||
train_sampler = torch.utils.data.RandomSampler(dataset)
|
||||
test_sampler = torch.utils.data.SequentialSampler(dataset_test)
|
||||
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
batch_size=batch_size,
|
||||
@@ -327,7 +327,7 @@ Follow these steps:
|
||||
num_workers=num_workers,
|
||||
pin_memory=True
|
||||
)
|
||||
|
||||
|
||||
data_loader_test = torch.utils.data.DataLoader(
|
||||
dataset_test, batch_size=batch_size, sampler=test_sampler, num_workers=num_workers, pin_memory=True
|
||||
)
|
||||
@@ -445,10 +445,10 @@ Follow these steps:
|
||||
running_loss = 0
|
||||
for step, (image, target) in enumerate(data_loader_test):
|
||||
image, target = image.to(device), target.to(device)
|
||||
|
||||
|
||||
output = model(image)
|
||||
loss = criterion(output, target)
|
||||
|
||||
|
||||
running_loss += loss.item()
|
||||
running_loss = running_loss / len(data_loader_test)
|
||||
print('Epoch: ', epoch, '| test loss : %0.4f' % running_loss )
|
||||
@@ -548,7 +548,7 @@ Follow these steps:
|
||||
|
||||
```py
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.nn.functional as F
|
||||
```
|
||||
|
||||
8. Define the CNN (Convolution Neural Networks) and relevant activation functions.
|
||||
@@ -564,7 +564,7 @@ Follow these steps:
|
||||
self.conv3 = nn.Conv2d(3, 6, 5)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
@@ -594,21 +594,21 @@ Follow these steps:
|
||||
|
||||
```py
|
||||
for epoch in range(2): # loop over the dataset multiple times
|
||||
|
||||
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(train_loader, 0):
|
||||
# get the inputs; data is a list of [inputs, labels]
|
||||
inputs, labels = data
|
||||
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
|
||||
# forward + backward + optimize
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
if i % 2000 == 1999: # print every 2000 mini-batches
|
||||
@@ -701,7 +701,7 @@ To understand the code step by step, follow these steps:
|
||||
4. The model is tested against the test set, test_images, and test_labels arrays.
|
||||
|
||||
```py
|
||||
fashion_mnist = tf.keras.datasets.fashion_mnist
|
||||
fashion_mnist = tf.keras.datasets.fashion_mnist
|
||||
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
|
||||
```
|
||||
|
||||
@@ -751,7 +751,7 @@ To understand the code step by step, follow these steps:
|
||||
|
||||
```py
|
||||
train_images = train_images / 255.0
|
||||
|
||||
|
||||
test_images = test_images / 255.0
|
||||
```
|
||||
|
||||
@@ -823,16 +823,16 @@ To understand the code step by step, follow these steps:
|
||||
|
||||
```py
|
||||
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
|
||||
|
||||
|
||||
print('\nTest accuracy:', test_acc)
|
||||
```
|
||||
|
||||
6. With the model trained, you can use it to make predictions about some images: the model's linear outputs and logits. Attach a softmax layer to convert the logits to probabilities, making it easier to interpret.
|
||||
|
||||
```py
|
||||
probability_model = tf.keras.Sequential([model,
|
||||
probability_model = tf.keras.Sequential([model,
|
||||
tf.keras.layers.Softmax()])
|
||||
|
||||
|
||||
predictions = probability_model.predict(test_images)
|
||||
```
|
||||
|
||||
@@ -856,20 +856,20 @@ To understand the code step by step, follow these steps:
|
||||
plt.grid(False)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
|
||||
|
||||
plt.imshow(img, cmap=plt.cm.binary)
|
||||
|
||||
|
||||
predicted_label = np.argmax(predictions_array)
|
||||
if predicted_label == true_label:
|
||||
color = 'blue'
|
||||
else:
|
||||
color = 'red'
|
||||
|
||||
|
||||
plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
|
||||
100*np.max(predictions_array),
|
||||
class_names[true_label]),
|
||||
color=color)
|
||||
|
||||
|
||||
def plot_value_array(i, predictions_array, true_label):
|
||||
true_label = true_label[i]
|
||||
plt.grid(False)
|
||||
@@ -878,7 +878,7 @@ To understand the code step by step, follow these steps:
|
||||
thisplot = plt.bar(range(10), predictions_array, color="#777777")
|
||||
plt.ylim([0, 1])
|
||||
predicted_label = np.argmax(predictions_array)
|
||||
|
||||
|
||||
thisplot[predicted_label].set_color('red')
|
||||
thisplot[true_label].set_color('blue')
|
||||
```
|
||||
@@ -930,7 +930,7 @@ To understand the code step by step, follow these steps:
|
||||
```py
|
||||
# Add the image to a batch where it's the only member.
|
||||
img = (np.expand_dims(img,0))
|
||||
|
||||
|
||||
print(img.shape)
|
||||
```
|
||||
|
||||
@@ -938,9 +938,9 @@ To understand the code step by step, follow these steps:
|
||||
|
||||
```py
|
||||
predictions_single = probability_model.predict(img)
|
||||
|
||||
|
||||
print(predictions_single)
|
||||
|
||||
|
||||
plot_value_array(1, predictions_single[0], test_labels)
|
||||
_ = plt.xticks(range(10), class_names, rotation=45)
|
||||
plt.show()
|
||||
@@ -973,7 +973,7 @@ Follow these steps:
|
||||
import shutil
|
||||
import string
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras import losses
|
||||
```
|
||||
@@ -982,7 +982,7 @@ Follow these steps:
|
||||
|
||||
```py
|
||||
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
|
||||
|
||||
|
||||
dataset = tf.keras.utils.get_file("aclImdb_v1", url,
|
||||
untar=True, cache_dir='.',
|
||||
cache_subdir='')
|
||||
@@ -1061,12 +1061,12 @@ Follow these steps:
|
||||
10. Next, create validation and test the dataset. Use the remaining 5,000 reviews from the training set for validation into two classes of 2,500 reviews each.
|
||||
|
||||
```py
|
||||
raw_val_ds = tf.keras.utils.text_dataset_from_directory('aclImdb/train',
|
||||
raw_val_ds = tf.keras.utils.text_dataset_from_directory('aclImdb/train',
|
||||
batch_size=batch_size,validation_split=0.2,subset='validation', seed=seed)
|
||||
|
||||
raw_test_ds =
|
||||
|
||||
raw_test_ds =
|
||||
tf.keras.utils.text_dataset_from_directory(
|
||||
'aclImdb/test',
|
||||
'aclImdb/test',
|
||||
batch_size=batch_size)
|
||||
```
|
||||
|
||||
@@ -1107,7 +1107,7 @@ To prepare the data for training, follow these steps:
|
||||
def vectorize_text(text, label):
|
||||
text = tf.expand_dims(text, -1)
|
||||
return vectorize_layer(text), label
|
||||
|
||||
|
||||
text_batch, label_batch = next(iter(raw_train_ds))
|
||||
first_review, first_label = text_batch[0], label_batch[0]
|
||||
print("Review", first_review)
|
||||
@@ -1143,7 +1143,7 @@ To prepare the data for training, follow these steps:
|
||||
|
||||
```py
|
||||
AUTOTUNE = tf.data.AUTOTUNE
|
||||
|
||||
|
||||
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
|
||||
@@ -1188,7 +1188,7 @@ To prepare the data for training, follow these steps:
|
||||
|
||||
```py
|
||||
loss, accuracy = model.evaluate(test_ds)
|
||||
|
||||
|
||||
print("Loss: ", loss)
|
||||
print("Accuracy: ", accuracy)
|
||||
```
|
||||
@@ -1209,9 +1209,9 @@ To prepare the data for training, follow these steps:
|
||||
val_acc = history_dict['val_binary_accuracy']
|
||||
loss = history_dict['loss']
|
||||
val_loss = history_dict['val_loss']
|
||||
|
||||
|
||||
epochs = range(1, len(acc) + 1)
|
||||
|
||||
|
||||
# "bo" is for "blue dot"
|
||||
plt.plot(epochs, loss, 'bo', label='Training loss')
|
||||
# b is for "solid blue line"
|
||||
@@ -1220,7 +1220,7 @@ To prepare the data for training, follow these steps:
|
||||
plt.xlabel('Epochs')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
|
||||
|
||||
plt.show()
|
||||
```
|
||||
|
||||
@@ -1250,11 +1250,11 @@ To prepare the data for training, follow these steps:
|
||||
model,
|
||||
layers.Activation('sigmoid')
|
||||
])
|
||||
|
||||
|
||||
export_model.compile(
|
||||
loss=losses.BinaryCrossentropy(from_logits=False), optimizer="adam", metrics=['accuracy']
|
||||
)
|
||||
|
||||
|
||||
# Test it with `raw_test_ds`, which yields raw strings
|
||||
loss, accuracy = export_model.evaluate(raw_test_ds)
|
||||
print(accuracy)
|
||||
@@ -1268,7 +1268,7 @@ To prepare the data for training, follow these steps:
|
||||
"The movie was okay.",
|
||||
"The movie was terrible..."
|
||||
]
|
||||
|
||||
|
||||
export_model.predict(examples)
|
||||
```
|
||||
|
||||
@@ -1296,7 +1296,7 @@ MIGraphX is a graph compiler focused on accelerating the Machine Learning infere
|
||||
|
||||
- Constant propagation
|
||||
|
||||
After doing all these transformations, MIGraphX emits code for the AMD GPU by calling to MIOpen or rocBLAS or creating HIP kernels for a particular operator. MIGraphX can also target CPUs using DNNL or ZenDNN libraries.
|
||||
After doing all these transformations, MIGraphX emits code for the AMD GPU by calling to MIOpen or rocBLAS or creating HIP kernels for a particular operator. MIGraphX can also target CPUs using DNNL or ZenDNN libraries.
|
||||
|
||||
MIGraphX provides easy-to-use APIs in C++ and Python to import machine models in ONNX or TensorFlow. Users can compile, save, load, and run these models using MIGraphX's C++ and Python APIs. Internally, MIGraphX parses ONNX or TensorFlow models into internal graph representation where each operator in the model gets mapped to an operator within MIGraphX. Each of these operators defines various attributes such as:
|
||||
|
||||
@@ -1351,7 +1351,7 @@ To use Docker, follow these steps:
|
||||
2. The repo contains a Dockerfile from which you can build a Docker image as:
|
||||
|
||||
```bash
|
||||
docker build -t migraphx .
|
||||
docker build -t migraphx .
|
||||
```
|
||||
|
||||
3. Then to enter the development environment, use Docker run:
|
||||
@@ -1388,22 +1388,22 @@ Follow these steps:
|
||||
2. The following script shows the usage of Python API to import the ONNX model, compile it, and run inference on it. Set LD_LIBRARY_PATH to /opt/rocm/ if required.
|
||||
|
||||
```py
|
||||
# import migraphx and numpy
|
||||
# import migraphx and numpy
|
||||
import migraphx
|
||||
import numpy as np
|
||||
# import and parse inception model
|
||||
# import and parse inception model
|
||||
model = migraphx.parse_onnx("inceptioni1.onnx")
|
||||
# compile model for the GPU target
|
||||
model.compile(migraphx.get_target("gpu"))
|
||||
# optionally print compiled model
|
||||
model.print()
|
||||
# create random input image
|
||||
model.print()
|
||||
# create random input image
|
||||
input_image = np.random.rand(1, 3, 299, 299).astype('float32')
|
||||
# feed image to model, 'x.1` is the input param name
|
||||
# feed image to model, 'x.1` is the input param name
|
||||
results = model.run({'x.1': input_image})
|
||||
# get the results back
|
||||
result_np = np.array(results[0])
|
||||
# print the inferred class of the input image
|
||||
# print the inferred class of the input image
|
||||
print(np.argmax(result_np))
|
||||
```
|
||||
|
||||
@@ -1422,7 +1422,7 @@ Follow these steps:
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <migraphx/migraphx.hpp>
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
migraphx::program prog;
|
||||
@@ -1438,12 +1438,12 @@ Follow these steps:
|
||||
prog.compile(targ, comp_opts);
|
||||
// print the compiled program
|
||||
prog.print();
|
||||
// randomly generate input image
|
||||
// randomly generate input image
|
||||
// of shape (1, 3, 299, 299)
|
||||
std::srand(unsigned(std::time(nullptr)));
|
||||
std::vector<float> input_image(1*299*299*3);
|
||||
std::generate(input_image.begin(), input_image.end(), std::rand);
|
||||
// users need to provide data for the input
|
||||
// users need to provide data for the input
|
||||
// parameters in order to run inference
|
||||
// you can query into migraph program for the parameters
|
||||
migraphx::program_parameters prog_params;
|
||||
@@ -1453,7 +1453,7 @@ Follow these steps:
|
||||
prog_params.add(input, migraphx::argument(param_shapes[input], input_image.data()));
|
||||
// run inference
|
||||
auto outputs = prog.eval(prog_params);
|
||||
// read back the output
|
||||
// read back the output
|
||||
float* results = reinterpret_cast<float*>(outputs[0].data());
|
||||
float* max = std::max_element(results, results + 1000);
|
||||
int answer = max - results;
|
||||
@@ -1466,16 +1466,16 @@ Follow these steps:
|
||||
```py
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project (CAI)
|
||||
|
||||
|
||||
set (CMAKE_CXX_STANDARD 14)
|
||||
set (EXAMPLE inception_inference)
|
||||
|
||||
|
||||
list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
|
||||
find_package (migraphx)
|
||||
|
||||
|
||||
message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
|
||||
add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
|
||||
|
||||
|
||||
target_link_libraries(${EXAMPLE} migraphx::c)
|
||||
```
|
||||
|
||||
@@ -1541,7 +1541,7 @@ Inference time: 0.029ms
|
||||
iterator : 9
|
||||
Inference complete
|
||||
Inference time: 0.029ms
|
||||
|
||||
|
||||
### TUNED ###
|
||||
iterator : 0
|
||||
Inference complete
|
||||
@@ -1581,7 +1581,7 @@ The best inference performance through MIGraphX is conditioned upon having tuned
|
||||
|
||||
Tuning is time consuming, and if the users have not performed tuning, they would see discrepancies between expected or claimed inference performance and actual inference performance. This has led to repetitive and time-consuming tuning tasks for each user.
|
||||
|
||||
MIGraphX introduces a feature, known as YModel, that stores the kernel config parameters found during tuning into a .mxr file. This ensures the same level of expected performance, even when a model is copied to a different user/system.
|
||||
MIGraphX introduces a feature, known as YModel, that stores the kernel config parameters found during tuning into a .mxr file. This ensures the same level of expected performance, even when a model is copied to a different user/system.
|
||||
|
||||
The YModel feature is available starting from ROCm 5.4.1 and UIF 1.1.
|
||||
|
||||
|
||||
@@ -10,8 +10,9 @@ align: center
|
||||
---
|
||||
ROCm Compatible Frameworks Flowchart
|
||||
```
|
||||
|
||||
## Frameworks Installation
|
||||
|
||||
- [How to Install PyTorch?](pytorch_install/pytorch_install)
|
||||
- [How to Install Magma?](magma_install/magma_install)
|
||||
- [How to Install Magma?](tensorflow_install/tensorflow_install)
|
||||
- [How to Install PyTorch?](pytorch_install/pytorch_install)
|
||||
- [How to Install Magma?](magma_install/magma_install)
|
||||
- [How to Install Magma?](tensorflow_install/tensorflow_install)
|
||||
|
||||
@@ -89,4 +89,4 @@ Advanced Micro Devices, Inc., \[Online\]. Available: [https://github.com/ROCmSof
|
||||
|
||||
Docker, \[Online\]. [https://docs.docker.com/get-started/overview/](https://docs.docker.com/get-started/overview/)
|
||||
|
||||
Torchvision, \[Online\]. Available [https://pytorch.org/vision/master/index.html?highlight=torchvision#module-torchvision](https://pytorch.org/vision/master/index.html?highlight=torchvision#module-torchvision)
|
||||
Torchvision, \[Online\]. Available [https://pytorch.org/vision/master/index.html?highlight=torchvision#module-torchvision](https://pytorch.org/vision/master/index.html?highlight=torchvision#module-torchvision)
|
||||
|
||||
@@ -26,10 +26,10 @@ For AMDGPU and ROCm installation using the installer script method on Linux
|
||||
distribution, follow these steps:
|
||||
|
||||
1. **Meet prerequisites** – Ensure the Prerequisites are met before downloading
|
||||
and installing the installer using the installer script method.
|
||||
and installing the installer using the installer script method.
|
||||
|
||||
2. **Download and install the installer script** – Ensure you download and
|
||||
install the installer script from the recommended URL.
|
||||
install the installer script from the recommended URL.
|
||||
|
||||
```{tip}
|
||||
The installer package is updated periodically to resolve known issues and add
|
||||
@@ -38,7 +38,7 @@ install the installer script from the recommended URL.
|
||||
```
|
||||
|
||||
3. **Use the installer script on Linux distributions** – Ensure you execute the
|
||||
script for installing use cases.
|
||||
script for installing use cases.
|
||||
|
||||
### Download and Install the Installer Script
|
||||
|
||||
@@ -147,7 +147,7 @@ To install use cases specific to your requirements, use the installer
|
||||
- To install multiple use cases:
|
||||
|
||||
```shell
|
||||
sudo amdgpu-install --usecase=hiplibsdk,rocm
|
||||
sudo amdgpu-install --usecase=hiplibsdk,rocm
|
||||
```
|
||||
|
||||
- To display a list of available use cases:
|
||||
@@ -164,7 +164,7 @@ To install use cases specific to your requirements, use the installer
|
||||
|
||||
```none
|
||||
If --usecase option is not present, the default selection is "graphics,opencl,hip"
|
||||
|
||||
|
||||
Available use cases:
|
||||
rocm(for users and developers requiring full ROCm stack)
|
||||
- OpenCL (ROCr/KFD based) runtime
|
||||
@@ -176,16 +176,16 @@ To install use cases specific to your requirements, use the installer
|
||||
lrt(for users of applications requiring ROCm runtime)
|
||||
- ROCm Compiler and device libraries
|
||||
- ROCr runtime and thunk
|
||||
opencl(for users of applications requiring OpenCL on Vega or
|
||||
opencl(for users of applications requiring OpenCL on Vega or
|
||||
later products)
|
||||
- ROCr based OpenCL
|
||||
- ROCm Language runtime
|
||||
|
||||
|
||||
openclsdk (for application developers requiring ROCr based OpenCL)
|
||||
- ROCr based OpenCL
|
||||
- ROCm Language runtime
|
||||
- development and SDK files for ROCr based OpenCL
|
||||
|
||||
|
||||
hip(for users of HIP runtime on AMD products)
|
||||
- HIP runtimes
|
||||
hiplibsdk (for application developers requiring HIP on AMD products)
|
||||
@@ -351,9 +351,9 @@ The functions of a package manager installation system are:
|
||||
- Grouping packages based on function
|
||||
- Extracting package archives
|
||||
- Ensuring a package is installed with all necessary packages and dependencies
|
||||
are managed
|
||||
are managed
|
||||
- From a remote repository, looking up, downloading, installing, or updating
|
||||
existing packages
|
||||
existing packages
|
||||
- Ensuring the authenticity and integrity of the package
|
||||
|
||||
### Installing ROCm on Linux Distributions
|
||||
@@ -362,27 +362,27 @@ For a fresh ROCm installation using the package manager method on a Linux
|
||||
distribution, follow the steps below:
|
||||
|
||||
1. **Meet prerequisites** – Ensure the Prerequisites are met before the ROCm
|
||||
installation.
|
||||
installation.
|
||||
|
||||
2. **Install kernel headers and development packages** – Ensure kernel headers
|
||||
and development packages are installed on the system.
|
||||
and development packages are installed on the system.
|
||||
|
||||
3. **Select the base URLs for AMDGPU and ROCm stack repository** – Ensure the
|
||||
base URLs for AMDGPU and ROCm stack repositories are selected.
|
||||
base URLs for AMDGPU and ROCm stack repositories are selected.
|
||||
|
||||
4. **Add the AMDGPU stack repository** – Ensure the AMDGPU stack repository is
|
||||
added.
|
||||
added.
|
||||
|
||||
5. **Install the kernel-mode driver and reboot the system** – Ensure the
|
||||
kernel-mode driver is installed and the system is rebooted.
|
||||
kernel-mode driver is installed and the system is rebooted.
|
||||
|
||||
6. **Add ROCm stack repository** – Ensure the ROCm stack repository is added.
|
||||
|
||||
7. **Install single-version or multiversion ROCm meta-packages** – Install the
|
||||
desired meta-packages.
|
||||
desired meta-packages.
|
||||
|
||||
8. **Verify installation for the applicable distributions** – Verify if the
|
||||
installation is successful.
|
||||
installation is successful.
|
||||
|
||||
```{important}
|
||||
You cannot install a kernel-mode driver in a Docker container. Refer to the
|
||||
@@ -428,8 +428,8 @@ To check the `kernel-headers` and `linux-modules-extra` package versions,
|
||||
follow these steps:
|
||||
|
||||
1. For the Ubuntu/Debian environment, execute the following command to verify
|
||||
the kernel headers and development packages are installed with the respective
|
||||
versions:
|
||||
the kernel headers and development packages are installed with the
|
||||
respective versions:
|
||||
|
||||
```shell
|
||||
sudo dpkg -l | grep linux-headers
|
||||
@@ -442,7 +442,7 @@ versions:
|
||||
```
|
||||
|
||||
2. Execute the following command to check whether the development packages are
|
||||
installed:
|
||||
installed:
|
||||
|
||||
```shell
|
||||
sudo dpkg -l | grep linux-modules-extra
|
||||
@@ -456,8 +456,8 @@ installed:
|
||||
```
|
||||
|
||||
3. If the supported version installation of Linux headers and development
|
||||
packages are not installed on the system, execute the following command to
|
||||
install the packages:
|
||||
packages are not installed on the system, execute the following command
|
||||
to install the packages:
|
||||
|
||||
```shell
|
||||
sudo apt install linux-headers-`uname -r` linux-modules-extra-`uname -r`
|
||||
@@ -492,26 +492,26 @@ install the packages:
|
||||
|
||||
To add the AMDGPU stack repository, follow these steps:
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} Ubuntu 20.04
|
||||
:sync: ubuntu-20.04
|
||||
::::{tab-set}
|
||||
:::{tab-item} Ubuntu 20.04
|
||||
:sync: ubuntu-20.04
|
||||
|
||||
```shell
|
||||
echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/5.4.3/ubuntu focal main' | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
:::
|
||||
:::{tab-item} Ubuntu 22.04
|
||||
:sync: ubuntu-22.04
|
||||
:::
|
||||
:::{tab-item} Ubuntu 22.04
|
||||
:sync: ubuntu-22.04
|
||||
|
||||
```shell
|
||||
echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/5.4.3/ubuntu jammy main' | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
:::
|
||||
::::
|
||||
|
||||
Install the kernel mode driver and reboot the system using the following
|
||||
commands:
|
||||
@@ -525,9 +525,9 @@ install the packages:
|
||||
|
||||
To add the ROCm repository, use the following steps:
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} Ubuntu 20.04
|
||||
:sync: ubuntu-20.04
|
||||
::::{tab-set}
|
||||
:::{tab-item} Ubuntu 20.04
|
||||
:sync: ubuntu-20.04
|
||||
|
||||
```shell
|
||||
for ver in 5.0.2 5.1.4 5.2.5 5.3.3 5.4.3; do
|
||||
@@ -537,9 +537,9 @@ install the packages:
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
:::
|
||||
:::{tab-item} Ubuntu 22.04
|
||||
:sync: ubuntu-22.04
|
||||
:::
|
||||
:::{tab-item} Ubuntu 22.04
|
||||
:sync: ubuntu-22.04
|
||||
|
||||
```shell
|
||||
for ver in 5.0.2 5.1.4 5.2.5 5.3.3 5.4.3; do
|
||||
@@ -549,8 +549,8 @@ install the packages:
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
:::
|
||||
::::
|
||||
|
||||
Install packages of your choice in a single-version ROCm install or
|
||||
in a multi-version ROCm install fashion. For more information on what
|
||||
@@ -596,7 +596,7 @@ To check the kernel headers and `linux-modules-extra` package versions,
|
||||
follow these steps:
|
||||
|
||||
1. To verify you have the supported version of the installed kernel headers,
|
||||
type the following on the command line:
|
||||
type the following on the command line:
|
||||
|
||||
```shell
|
||||
sudo yum list installed kernel-headers
|
||||
@@ -607,15 +607,15 @@ type the following on the command line:
|
||||
the same versions as the kernel.
|
||||
|
||||
2. The following command lists the development packages on your system. Verify
|
||||
if the listed development package's version number matches the kernel version
|
||||
number:
|
||||
if the listed development package's version number matches the kernel
|
||||
version number:
|
||||
|
||||
```shell
|
||||
sudo yum list installed kernel-devel
|
||||
```
|
||||
|
||||
3. If the supported version installation of kernel headers and development
|
||||
packages does not exist on the system, execute the command below to install:
|
||||
packages does not exist on the system, execute the command below to install:
|
||||
|
||||
```shell
|
||||
sudo yum install kernel-headers-`uname -r` kernel-devel-`uname -r`
|
||||
@@ -631,9 +631,9 @@ packages does not exist on the system, execute the command below to install:
|
||||
section.
|
||||
```
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} RHEL 8.6
|
||||
:sync: RHEL-8.6
|
||||
::::{tab-set}
|
||||
:::{tab-item} RHEL 8.6
|
||||
:sync: RHEL-8.6
|
||||
|
||||
```shell
|
||||
sudo tee --append /etc/yum.repos.d/amdgpu.repo <<EOF
|
||||
@@ -648,10 +648,10 @@ packages does not exist on the system, execute the command below to install:
|
||||
sudo yum clean all
|
||||
```
|
||||
|
||||
:::
|
||||
:::
|
||||
|
||||
:::{tab-item} RHEL 8.7
|
||||
:sync: RHEL-8.7
|
||||
:::{tab-item} RHEL 8.7
|
||||
:sync: RHEL-8.7
|
||||
|
||||
```shell
|
||||
sudo tee --append /etc/yum.repos.d/amdgpu.repo <<EOF
|
||||
@@ -666,10 +666,10 @@ packages does not exist on the system, execute the command below to install:
|
||||
sudo yum clean all
|
||||
```
|
||||
|
||||
:::
|
||||
:::
|
||||
|
||||
:::{tab-item} RHEL 9.1
|
||||
:sync: RHEL-9.1
|
||||
:::{tab-item} RHEL 9.1
|
||||
:sync: RHEL-9.1
|
||||
|
||||
```shell
|
||||
sudo tee --append /etc/yum.repos.d/amdgpu.repo <<EOF
|
||||
@@ -684,8 +684,8 @@ packages does not exist on the system, execute the command below to install:
|
||||
sudo yum clean all
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
:::
|
||||
::::
|
||||
|
||||
Install the kernel mode driver and reboot the system using the following
|
||||
commands:
|
||||
@@ -758,8 +758,8 @@ To check the `kernel-headers` and `linux-modules-extra` package versions, follow
|
||||
these steps:
|
||||
|
||||
1. Ensure that the correct version of the latest `kernel-default-devel` and
|
||||
`kernel-default` packages are installed. The following command lists the
|
||||
installed kernel-default-devel and kernel-default package:
|
||||
`kernel-default` packages are installed. The following command lists the
|
||||
installed kernel-default-devel and kernel-default package:
|
||||
|
||||
```shell
|
||||
sudo zypper info kernel-default-devel or kernel-default
|
||||
@@ -772,7 +772,7 @@ installed kernel-default-devel and kernel-default package:
|
||||
```
|
||||
|
||||
2. If the required version of packages does not exist on the system, install
|
||||
with the command below:
|
||||
with the command below:
|
||||
|
||||
```shell
|
||||
sudo zypper install kernel-default-devel or kernel-default
|
||||
@@ -862,7 +862,7 @@ but are generally useful. Verification of the install is advised.
|
||||
### Post-install Actions
|
||||
|
||||
1. Instruct the system linker where to find the shared objects (`.so` files) for
|
||||
ROCm applications.
|
||||
ROCm applications.
|
||||
|
||||
```shell
|
||||
sudo tee --append /etc/ld.so.conf.d/rocm.conf <<EOF
|
||||
@@ -928,7 +928,7 @@ To ensure the packages are installed successfully, use the following commands:
|
||||
:::{tab-item} Ubuntu
|
||||
:sync: ubuntu
|
||||
|
||||
```shell
|
||||
```shell
|
||||
sudo apt list --installed
|
||||
```
|
||||
|
||||
|
||||
@@ -23,18 +23,18 @@ To upgrade the system with the desired ROCm release using the package manager
|
||||
method, follow the steps below:
|
||||
|
||||
1. **Update the AMDGPU stack repository** – Ensure you have updated the AMDGPU
|
||||
repository.
|
||||
repository.
|
||||
|
||||
2. **Upgrade the kernel-mode driver and reboot the system** – Ensure you have
|
||||
upgraded the kernel-mode driver and rebooted the system.
|
||||
upgraded the kernel-mode driver and rebooted the system.
|
||||
|
||||
3. **Update the ROCm repository** – Ensure you have updated the ROCm repository
|
||||
with the desired ROCm release.
|
||||
with the desired ROCm release.
|
||||
|
||||
4. **Upgrade the ROCm meta-packages** – Upgrade the ROCm meta-packages.
|
||||
|
||||
5. **Verify the upgrade for the applicable distributions** – Verify if the
|
||||
upgrade is successful.
|
||||
upgrade is successful.
|
||||
|
||||
To upgrade ROCm on different Linux distributions, refer to the sections below
|
||||
for specific commands.
|
||||
|
||||
@@ -16,7 +16,7 @@ to your distribution.
|
||||
:::{tab-item} Ubuntu
|
||||
:sync: ubuntu
|
||||
|
||||
```shell
|
||||
```shell
|
||||
sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
|
||||
```
|
||||
|
||||
|
||||
@@ -72,10 +72,10 @@ installation.
|
||||
To make installation selections and install, follow these steps:
|
||||
|
||||
1. Scroll the window to AMD Display Driver and select the desired install type.
|
||||
Refer to the section [AMD Display Driver](#amd-display-driver) for more
|
||||
information on installation types.
|
||||
Refer to the section [AMD Display Driver](#amd-display-driver) for more
|
||||
information on installation types.
|
||||
2. Once selected, click **Install** located in the lower right corner, and skip
|
||||
to [Installing Components](#installing-components).
|
||||
to [Installing Components](#installing-components).
|
||||
|
||||
#### Deselect All
|
||||
|
||||
|
||||
@@ -4,14 +4,14 @@
|
||||
:gutter: 1
|
||||
|
||||
:::{grid-item-card} [MIOpen](https://rocmdocs.amd.com/projects/MIOpen/en/latest/)
|
||||
AMD's library for high performance machine learning primitives.
|
||||
AMD's library for high performance machine learning primitives.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/MIOpen/en/latest/)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [Composable Kernel](https://rocmdocs.amd.com/projects/composable_kernel/en/latest/)
|
||||
Composable Kernel: Performance Portable Programming Model for Machine Learning Tensor Operators
|
||||
Composable Kernel: Performance Portable Programming Model for Machine Learning Tensor Operators
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/composable_kernel/en/latest/)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# All Reference Material
|
||||
|
||||
## ROCm Software Groups
|
||||
|
||||
:::::{grid} 1 1 2 2
|
||||
:gutter: 1
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ ROCmCC is a Clang/LLVM-based compiler. It is optimized for high-performance comp
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [ROCgdb](https://rocmdocs.amd.com/projects/ROCgdb/en/latest/)
|
||||
This is ROCgdb, the ROCm source-level debugger for Linux, based on GDB, the GNU source-level debugger.
|
||||
This is ROCgdb, the ROCm source-level debugger for Linux, based on GDB, the GNU source-level debugger.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/ROCgdb/en/latest/)
|
||||
|
||||
@@ -25,7 +25,7 @@ ROC profiler library. Profiling with perf-counters and derived metrics. Library
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [ROCTracer](https://rocmdocs.amd.com/projects/roctracer/en/latest/)
|
||||
Callback/Activity Library for Performance tracing AMD GPU's
|
||||
Callback/Activity Library for Performance tracing AMD GPU's
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/roctracer/en/latest/)
|
||||
|
||||
|
||||
@@ -4,14 +4,14 @@
|
||||
:gutter: 1
|
||||
|
||||
:::{grid-item-card} [MIVisionX](https://rocmdocs.amd.com/projects/MIVisionX/en/latest/)
|
||||
MIVisionX toolkit is a set of comprehensive computer vision and machine intelligence libraries, utilities, and applications bundled into a single toolkit. AMD MIVisionX also delivers a highly optimized open-source implementation of the Khronos OpenVX™ and OpenVX™ Extensions.
|
||||
MIVisionX toolkit is a set of comprehensive computer vision and machine intelligence libraries, utilities, and applications bundled into a single toolkit. AMD MIVisionX also delivers a highly optimized open-source implementation of the Khronos OpenVX™ and OpenVX™ Extensions.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/MIVisionX/en/latest/)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [rocAL](https://rocmdocs.amd.com/projects/rocAL/en/latest/)
|
||||
The AMD ROCm Augmentation Library (rocAL) is designed to efficiently decode and process images and videos from a variety of storage formats and modify them through a processing graph programmable by the user. rocAL currently provides C API.
|
||||
The AMD ROCm Augmentation Library (rocAL) is designed to efficiently decode and process images and videos from a variety of storage formats and modify them through a processing graph programmable by the user. rocAL currently provides C API.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/rocAL/en/latest/)
|
||||
|
||||
|
||||
@@ -50,34 +50,42 @@ Figure 1: Structure of a single GCD in the AMD Instinct MI250 accelerator.
|
||||
:header-rows: 1
|
||||
:name: mi250-perf
|
||||
|
||||
* - Computation and Data Type
|
||||
*
|
||||
- Computation and Data Type
|
||||
- FLOPS/CLOCK/CU
|
||||
- Peak TFLOPS
|
||||
* - Matrix FP64
|
||||
*
|
||||
- Matrix FP64
|
||||
- 256
|
||||
- 90.5
|
||||
* - Vector FP64
|
||||
*
|
||||
- Vector FP64
|
||||
- 128
|
||||
- 45.3
|
||||
* - Matrix FP32
|
||||
*
|
||||
- Matrix FP32
|
||||
- 256
|
||||
- 90.5
|
||||
* - Packed FP32
|
||||
*
|
||||
- Packed FP32
|
||||
- 256
|
||||
- 90.5
|
||||
* - Vector FP32
|
||||
*
|
||||
- Vector FP32
|
||||
- 128
|
||||
- 45.3
|
||||
* - Matrix FP16
|
||||
*
|
||||
- Matrix FP16
|
||||
- 1024
|
||||
- 362.1
|
||||
* - Matrix BF16
|
||||
*
|
||||
- Matrix BF16
|
||||
- 1024
|
||||
- 362.1
|
||||
* - Matrix INT8
|
||||
*
|
||||
- Matrix INT8
|
||||
- 1024
|
||||
- 362.1
|
||||
|
||||
```
|
||||
|
||||
{numref}`mi250-perf` summarizes the aggregated peak performance of the AMD
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# C++ Primitive Libraries
|
||||
|
||||
ROCm template libraries for algorithms are as follows:
|
||||
|
||||
:::::{grid} 1 1 3 3
|
||||
|
||||
@@ -25,7 +25,7 @@ This tool acts as a command line interface for manipulating and monitoring the a
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [ROCm Datacenter Tool](https://rocmdocs.amd.com/projects/rdc/en/latest/)
|
||||
The ROCm™ Data Center Tool simplifies the administration and addresses key infrastructure challenges in AMD GPUs in cluster and datacenter environments.
|
||||
The ROCm™ Data Center Tool simplifies the administration and addresses key infrastructure challenges in AMD GPUs in cluster and datacenter environments.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/rdc/en/latest/)
|
||||
- [Examples](https://github.com/RadeonOpenCompute/rdc/tree/master/example)
|
||||
|
||||
@@ -77,7 +77,7 @@ For more details on rocprof, refer to the ROCm Profiling Tools document on [http
|
||||
**Prerequisite:** When using the --sys-trace option, compile the OpenMP program with:
|
||||
|
||||
```bash
|
||||
-Wl,–rpath,/opt/rocm-{version}/lib -lamdhip64
|
||||
-Wl,–rpath,/opt/rocm-{version}/lib -lamdhip64
|
||||
```
|
||||
|
||||
The following tracing options are widely used to generate useful information:
|
||||
@@ -159,25 +159,25 @@ A simple program demonstrating the use of this feature is:
|
||||
$ cat parallel_for.cpp
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#define N 64
|
||||
#pragma omp requires unified_shared_memory
|
||||
int main() {
|
||||
int n = N;
|
||||
int *a = new int[n];
|
||||
int *b = new int[n];
|
||||
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
b[i] = i;
|
||||
|
||||
|
||||
#pragma omp target parallel for map(to:b[:n])
|
||||
for(int i = 0; i < n; i++)
|
||||
a[i] = b[i];
|
||||
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
if(a[i] != i)
|
||||
printf("error at %d: expected %d, got %d\n", i, i+1, a[i]);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
$ clang++ -O2 -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:xnack+ parallel_for.cpp
|
||||
@@ -231,7 +231,7 @@ See the example below, where the user builds the program using -msafe-fp-atomics
|
||||
double a = 0.0;.
|
||||
#pragma omp atomic hint(AMD_fast_fp_atomics)
|
||||
a = a + 1.0;
|
||||
|
||||
|
||||
double b = 0.0;
|
||||
#pragma omp atomic
|
||||
b = b + 1.0;
|
||||
@@ -260,11 +260,12 @@ Address Sanitizer is a memory error detector tool utilized by applications to de
|
||||
- Initialization order bugs
|
||||
|
||||
**Features Supported on AMDGPU Platform (amdgcn-amd-amdhsa):**
|
||||
|
||||
- Heap buffer overflow
|
||||
|
||||
- Global buffer overflow
|
||||
|
||||
**Software (Kernel/OS) Requirements:** Unified Shared Memory support with Xnack capability. See the section on [Unified Shared Memory](#unified-shared-memory) for prerequisites and details on Xnack.
|
||||
**Software (Kernel/OS) Requirements:** Unified Shared Memory support with Xnack capability. See the section on [Unified Shared Memory](#unified-shared-memory) for prerequisites and details on Xnack.
|
||||
|
||||
**Example:**
|
||||
|
||||
@@ -276,7 +277,7 @@ void main() {
|
||||
....... // Some program statements
|
||||
#pragma omp target map(to : A[0:N], B[0:N]) map(from: C[0:N])
|
||||
{
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for(int i =0 ; i < N; i++){
|
||||
C[i+10] = A[i] + B[i];
|
||||
} // end of for loop
|
||||
@@ -290,7 +291,7 @@ See the complete sample code for heap buffer overflow [here](https://github.com/
|
||||
- Global buffer overflow
|
||||
|
||||
```bash
|
||||
#pragma omp declare target
|
||||
#pragma omp declare target
|
||||
int A[N],B[N],C[N];
|
||||
#pragma omp end declare target
|
||||
void main(){
|
||||
@@ -300,7 +301,7 @@ void main(){
|
||||
{
|
||||
#pragma omp target update to(A,B)
|
||||
#pragma omp target parallel for
|
||||
for(int i=0; i<N; i++){
|
||||
for(int i=0; i<N; i++){
|
||||
C[i]=A[i*100]+B[i+22];
|
||||
} // end of for loop
|
||||
#pragma omp target update from(C)
|
||||
|
||||
@@ -18,6 +18,7 @@ The differences are listed in [the table below](rocm-llvm-vs-alt).
|
||||
:::
|
||||
|
||||
For more details, see:
|
||||
|
||||
- AMD GPU usage: [llvm.org/docs/AMDGPUUsage.html](https://llvm.org/docs/AMDGPUUsage.html)
|
||||
- Releases and source: <https://github.com/RadeonOpenCompute/llvm-project>
|
||||
|
||||
@@ -153,7 +154,6 @@ to perform this optimization. Users can choose different levels of
|
||||
aggressiveness with which this optimization can be applied to the application,
|
||||
with 1 being the least aggressive and 7 being the most aggressive level.
|
||||
|
||||
|
||||
:::{table} -fstruct-layout Values and Their Effects
|
||||
| -fstruct-layout value | Structure peeling | Pointer size after selective compression of self-referential pointers in structures, wherever safe | Type of structure fields eligible for compression | Whether compression performed under safety check |
|
||||
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
||||
|
||||
@@ -4,14 +4,14 @@
|
||||
:gutter: 1
|
||||
|
||||
:::{grid-item-card} [RVS](https://rocmdocs.amd.com/projects/RVS/en/latest/)
|
||||
The ROCm Validation Suite is a system administrator’s and cluster manager's tool for detecting and troubleshooting common problems affecting AMD GPU(s) running in a high-performance computing environment, enabled using the ROCm software stack on a compatible platform.
|
||||
The ROCm Validation Suite is a system administrator’s and cluster manager's tool for detecting and troubleshooting common problems affecting AMD GPU(s) running in a high-performance computing environment, enabled using the ROCm software stack on a compatible platform.
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/RVS/en/latest/)
|
||||
|
||||
:::
|
||||
|
||||
:::{grid-item-card} [TransferBench](https://rocmdocs.amd.com/projects/TransferBench/en/latest/)
|
||||
TransferBench is a simple utility capable of benchmarking simultaneous transfers between user-specified devices (CPUs/GPUs).
|
||||
TransferBench is a simple utility capable of benchmarking simultaneous transfers between user-specified devices (CPUs/GPUs).
|
||||
|
||||
- [Documentation](https://rocmdocs.amd.com/projects/TransferBench/en/latest/)
|
||||
- [Changelog](https://github.com/ROCmSoftwarePlatform/TransferBench/blob/develop/CHANGELOG.md)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Frameworks Support Matrix
|
||||
|
||||
The software support matrices for ROCm container releases is listed.
|
||||
The software support matrices for ROCm container releases is listed.
|
||||
|
||||
## ROCm 5.6
|
||||
|
||||
|
||||
@@ -61,7 +61,6 @@ Use Driver Shipped with ROCm
|
||||
| AMD Radeon™ VII | Vega |Full | gfx906 | Supported | Unsupported |
|
||||
| AMD Radeon™ R9 Fury | Fiji |Full | gfx803 | Community | Unsupported |
|
||||
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
@@ -73,7 +72,7 @@ Use Driver Shipped with ROCm
|
||||
:::{tab-item} AMD Instinct™
|
||||
:sync: instinct
|
||||
|
||||
Instinct™ accelerators support the full stack available in ROCm. Instinct™
|
||||
Instinct™ accelerators support the full stack available in ROCm. Instinct™
|
||||
accelerators are Linux only.
|
||||
|
||||
:::
|
||||
@@ -104,6 +103,7 @@ below:
|
||||
- HIP SDK includes the HIP Runtime and a selection of GPU libraries for compute.
|
||||
Please see [article](link) for details of HIP SDK.
|
||||
- HIP enables the use of the HIP Runtime only.
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
ROCm™ is released by Advanced Micro Devices, Inc. and is licensed per component separately.
|
||||
The following table is a list of ROCm components with links to their respective license
|
||||
terms. These components may include third party components subject to
|
||||
terms. These components may include third party components subject to
|
||||
additional licenses. Please review individual repositories for more information.
|
||||
The table shows ROCm components, the name of license and link to the license terms.
|
||||
The table shows ROCm components, the name of license and link to the license terms.
|
||||
The table is ordered to follow ROCm's manifest file.
|
||||
|
||||
| Component | License |
|
||||
@@ -63,9 +63,9 @@ The table is ordered to follow ROCm's manifest file.
|
||||
| [aomp-extras](https://github.com/ROCm-Developer-Tools/aomp-extras/) | [MIT](https://github.com/ROCm-Developer-Tools/aomp-extras/blob/aomp-dev/LICENSE) |
|
||||
| [flang](https://github.com/ROCm-Developer-Tools/flang/) | [Apache 2.0](https://github.com/ROCm-Developer-Tools/flang/blob/master/LICENSE.txt) |
|
||||
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
Open sourced ROCm components are released via public GitHub
|
||||
repositories, packages on repo.radeon.com and other distribution channels.
|
||||
Proprietary products are only available on repo.radeon.com. Currently, only
|
||||
Proprietary products are only available on repo.radeon.com. Currently, only
|
||||
one component of ROCm, rocm-llvm-alt is governed by a proprietary license.
|
||||
Proprietary components are organized in a proprietary subdirectory in the package
|
||||
repositories to distinguish from open sourced packages.
|
||||
|
||||
@@ -13,13 +13,13 @@ distributions. Following is the ROCm proposed file structure.
|
||||
| -- lib
|
||||
| -- lib<soname>.so->lib<soname>.so.major->lib<soname>.so.major.minor.patch
|
||||
(public libaries to link with applications)
|
||||
| -- <component>
|
||||
| -- <component>
|
||||
| -- architecture dependent libraries and binaries used internally by components
|
||||
| -- cmake
|
||||
| -- <component>
|
||||
| --<component>.config.cmake
|
||||
| -- libexec
|
||||
| -- <component>
|
||||
| -- <component>
|
||||
| -- non ISA/architecture independent executables used internally by components
|
||||
| -- include
|
||||
| -- <component>
|
||||
@@ -94,11 +94,12 @@ from the new location (/opt/rocm-xxx/include) as shown in the example below.
|
||||
The depreciation plan for backward compatibility wrapper header files is as
|
||||
follows
|
||||
|
||||
- #pragma message announcing deprecation – ROCm v5.2 release.
|
||||
- #pragma message changed to #warning – Future release, tentatively ROCm v5.5.
|
||||
- #warning changed to #error – Future release, tentatively ROCm v5.6.
|
||||
- `#pragma` message announcing deprecation – ROCm v5.2 release.
|
||||
- `#pragma` message changed to `#warning` – Future release, tentatively ROCm
|
||||
v5.5.
|
||||
- `#warning` changed to `#error` – Future release, tentatively ROCm v5.6.
|
||||
- Backward compatibility wrappers removed – Future release, tentatively ROCm
|
||||
v6.0.
|
||||
v6.0.
|
||||
|
||||
### Executable files
|
||||
|
||||
@@ -145,19 +146,19 @@ will be deprecated in a future release. Application have to make sure to include
|
||||
correct header file and use correct search paths.
|
||||
|
||||
1. `#include<header_file.h>` needs to be changed to
|
||||
`#include <component/header_file.h>`
|
||||
`#include <component/header_file.h>`
|
||||
|
||||
For eg: `#include <hip.h>` needs to change
|
||||
to `#include <hip/hip.h>`
|
||||
For eg: `#include <hip.h>` needs to change
|
||||
to `#include <hip/hip.h>`
|
||||
|
||||
2. Any variable in cmake or makefiles pointing to component folder needs to
|
||||
changed.
|
||||
changed.
|
||||
|
||||
For eg: `VAR1=/opt/rocm/hip` needs to be changed to `VAR1=/opt/rocm`
|
||||
`VAR2=/opt/rocm/hsa` needs to be changed to `VAR2=/opt/rocm`
|
||||
For eg: `VAR1=/opt/rocm/hip` needs to be changed to `VAR1=/opt/rocm`
|
||||
`VAR2=/opt/rocm/hsa` needs to be changed to `VAR2=/opt/rocm`
|
||||
|
||||
3. Any reference to `/opt/rocm/<component>/bin` or `/opt/rocm/<component>/lib`
|
||||
needs to be changed to `/opt/rocm/bin` and `/opt/rocm/lib/` respectively.
|
||||
needs to be changed to `/opt/rocm/bin` and `/opt/rocm/lib/` respectively.
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -26,11 +26,11 @@ It is customary for Linux installers to integrate into the system's package
|
||||
manager. There are two notable groups of package sources:
|
||||
|
||||
- AMD-hosted repositories maintained by AMD available to register on supported
|
||||
Linux distribution versions. For a complete list of AMD-supported platforms,
|
||||
refer to the article: [GPU and OS Support](../release/gpu_os_support).
|
||||
Linux distribution versions. For a complete list of AMD-supported platforms,
|
||||
refer to the article: [GPU and OS Support](../release/gpu_os_support).
|
||||
- Distribution-hosted repositories maintained by the developer of said Linux
|
||||
distribution. These require little to no setup from the user, but aren't tested
|
||||
by AMD. For support on these installations, contact the relevant maintainers.
|
||||
distribution. These require little to no setup from the user, but aren't tested
|
||||
by AMD. For support on these installations, contact the relevant maintainers.
|
||||
|
||||
AMD also provides installer scripts for those that wish to drive installations
|
||||
in a more manual fashion.
|
||||
@@ -71,12 +71,12 @@ The `amdgpu-install` script streamlines the installation process by:
|
||||
- Abstracting the distribution-specific package installation logic
|
||||
- Performing the repository setup
|
||||
- Allowing you to specify the use case and automating the installation of all
|
||||
the required packages
|
||||
the required packages
|
||||
- Installing multiple ROCm releases simultaneously on a system
|
||||
- Automating updating local repository information through enhanced
|
||||
functionality of the amdgpu-install script
|
||||
functionality of the amdgpu-install script
|
||||
- Performing post-install checks to verify whether the installation was
|
||||
completed successfully
|
||||
completed successfully
|
||||
- Upgrading the installed ROCm release
|
||||
- Uninstalling the installed single-version or multiversion ROCm releases
|
||||
|
||||
@@ -125,8 +125,8 @@ The single-version ROCm installation refers to the following:
|
||||
The multiversion installation refers to the following:
|
||||
|
||||
- Installation of multiple instances of the ROCm stack on a system. Extending
|
||||
the package name and its dependencies with the release version adds the ability
|
||||
to support multiple versions of packages simultaneously.
|
||||
the package name and its dependencies with the release version adds the
|
||||
ability to support multiple versions of packages simultaneously.
|
||||
- Use of versioned ROCm meta-packages.
|
||||
|
||||
```{note}
|
||||
|
||||
@@ -60,9 +60,9 @@ of required packages and libraries.
|
||||
**Example:**
|
||||
|
||||
- rocm-hip-runtime is used to deploy on supported machines to execute HIP
|
||||
applications.
|
||||
applications.
|
||||
- rocm-hip-sdk contains runtime components to deploy and execute HIP
|
||||
applications.
|
||||
applications.
|
||||
|
||||
```{figure-md} meta-packages
|
||||
|
||||
|
||||
@@ -18,14 +18,14 @@ kernel version.
|
||||
Verify the Linux distribution using the following steps:
|
||||
|
||||
1. To obtain the Linux distribution information, type the following command on
|
||||
your system from the Command Line Interface (CLI):
|
||||
your system from the Command Line Interface (CLI):
|
||||
|
||||
```shell
|
||||
uname -m && cat /etc/*release
|
||||
```
|
||||
|
||||
2. Confirm that the obtained Linux distribution information matches with those
|
||||
with [System Requirements](/release/gpu_os_support#os-support).
|
||||
with [System Requirements](/release/gpu_os_support#os-support).
|
||||
|
||||
**Example:** Running the command above on an Ubuntu system results in the
|
||||
following output:
|
||||
@@ -51,7 +51,7 @@ Verify the kernel version using the following steps:
|
||||
```
|
||||
|
||||
2. Confirm that the obtained kernel version information matches with System
|
||||
Requirements.
|
||||
Requirements.
|
||||
|
||||
**Example:** The output of the command above lists the kernel version in the
|
||||
following format:
|
||||
@@ -97,7 +97,7 @@ To verify that your system has a ROCm-capable GPU, use these steps:
|
||||
```
|
||||
|
||||
2. Verify from the output that the listed product names match with the Product
|
||||
Id given in the table above.
|
||||
Id given in the table above.
|
||||
|
||||
## Confirm the System Has All the Required Tools and Packages Installed
|
||||
|
||||
@@ -125,7 +125,7 @@ GPU resources.
|
||||
```
|
||||
|
||||
3. Use of the video group is recommended for all ROCm-supported operating
|
||||
systems.
|
||||
systems.
|
||||
|
||||
```{note}
|
||||
render group is required only for Ubuntu v20.04.
|
||||
|
||||
Reference in New Issue
Block a user