mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-08 06:13:59 -05:00
* Create stanford-megatron-lm-compatibility.rst * toc and wordlist * Update deep-learning-rocm.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * fixes and adding to main compat matrix * formatting fix * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Update docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Update docs/compatibility/ml-compatibility/stanford-megatron-lm-compatibility.rst Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com> * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst * Update stanford-megatron-lm-compatibility.rst --------- Co-authored-by: Leo Paoletti <164940351+lpaoletti@users.noreply.github.com>
955 lines
7.0 KiB
Plaintext
955 lines
7.0 KiB
Plaintext
AAC
|
|
ABI
|
|
ACE
|
|
ACEs
|
|
ACS
|
|
AccVGPR
|
|
AccVGPRs
|
|
ALU
|
|
AllReduce
|
|
AMD
|
|
AMDGPU
|
|
AMDGPUs
|
|
AMDMIGraphX
|
|
AMI
|
|
AOCC
|
|
AOMP
|
|
AOT
|
|
AOTriton
|
|
APBDIS
|
|
APIC
|
|
APIs
|
|
APU
|
|
APUs
|
|
ASIC
|
|
ASICs
|
|
ASan
|
|
ASAN
|
|
ASm
|
|
ATI
|
|
atomicRMW
|
|
AddressSanitizer
|
|
AlexNet
|
|
Andrej
|
|
Arb
|
|
Autocast
|
|
BARs
|
|
BatchNorm
|
|
BLAS
|
|
BMC
|
|
BabelStream
|
|
Blit
|
|
Blockwise
|
|
Bluefield
|
|
Bootloader
|
|
CAS
|
|
CCD
|
|
CDNA
|
|
CHTML
|
|
CIFAR
|
|
CLI
|
|
CLion
|
|
CMake
|
|
CMakeLists
|
|
CMakePackage
|
|
CP
|
|
CPC
|
|
CPF
|
|
CPP
|
|
CPU
|
|
CPUs
|
|
Cron
|
|
CSC
|
|
CSE
|
|
CSV
|
|
CSn
|
|
CTest
|
|
CTests
|
|
CU
|
|
CUDA
|
|
CUs
|
|
CXX
|
|
Cavium
|
|
CentOS
|
|
ChatGPT
|
|
CoRR
|
|
Codespaces
|
|
Commitizen
|
|
CommonMark
|
|
Concretized
|
|
Conda
|
|
ConnectX
|
|
CuPy
|
|
da
|
|
Dashboarding
|
|
Dataloading
|
|
DBRX
|
|
DDR
|
|
DF
|
|
DGEMM
|
|
DGL
|
|
DGLGraph
|
|
dGPU
|
|
dGPUs
|
|
DIMM
|
|
DKMS
|
|
DL
|
|
DMA
|
|
DNN
|
|
DNNL
|
|
DPM
|
|
DRI
|
|
DW
|
|
DWORD
|
|
Dask
|
|
DataFrame
|
|
DataLoader
|
|
DataParallel
|
|
Debian
|
|
decompositions
|
|
DeepSeek
|
|
DeepSpeed
|
|
Dependabot
|
|
Deprecations
|
|
DevCap
|
|
DirectX
|
|
Dockerfile
|
|
Doxygen
|
|
ELMo
|
|
ENDPGM
|
|
EPYC
|
|
ESXi
|
|
EoS
|
|
FBGEMM
|
|
FFT
|
|
FFTs
|
|
FFmpeg
|
|
FHS
|
|
FIXME
|
|
FMA
|
|
FP
|
|
FX
|
|
Filesystem
|
|
FindDb
|
|
Flang
|
|
FlashAttention
|
|
FluxBenchmark
|
|
Fortran
|
|
Fuyu
|
|
GALB
|
|
GAT
|
|
GCC
|
|
GCD
|
|
GCDs
|
|
GCN
|
|
GDB
|
|
GDDR
|
|
GDR
|
|
GDS
|
|
GEMM
|
|
GEMMs
|
|
GFLOPS
|
|
GFortran
|
|
GFXIP
|
|
Gemma
|
|
GiB
|
|
GIM
|
|
GL
|
|
Glibc
|
|
GLXT
|
|
Gloo
|
|
GMI
|
|
GPG
|
|
GPR
|
|
GPT
|
|
GPU
|
|
GPU's
|
|
GPUs
|
|
Graphbolt
|
|
GraphSage
|
|
GRBM
|
|
GenAI
|
|
GenZ
|
|
GitHub
|
|
Gitpod
|
|
HBM
|
|
HCA
|
|
HGX
|
|
HIPCC
|
|
HIPExtension
|
|
HIPIFY
|
|
HIPification
|
|
hipification
|
|
HIPify
|
|
HPC
|
|
HPCG
|
|
HPE
|
|
HPL
|
|
HSA
|
|
HW
|
|
HWE
|
|
HWS
|
|
Haswell
|
|
Higgs
|
|
Hyperparameters
|
|
Huggingface
|
|
ICD
|
|
ICT
|
|
ICV
|
|
IDE
|
|
IDEs
|
|
IFWI
|
|
IMDb
|
|
IncDec
|
|
IOMMU
|
|
IOP
|
|
IOPM
|
|
IOV
|
|
IRQ
|
|
ISA
|
|
ISV
|
|
ISVs
|
|
ITL
|
|
ImageNet
|
|
InfiniBand
|
|
Inlines
|
|
IntelliSense
|
|
Interop
|
|
Intersphinx
|
|
Intra
|
|
Ioffe
|
|
JAX's
|
|
Jinja
|
|
JSON
|
|
Jupyter
|
|
KFD
|
|
KFDTest
|
|
KMD
|
|
KV
|
|
KVM
|
|
Karpathy's
|
|
KiB
|
|
Kineto
|
|
Keras
|
|
Khronos
|
|
LAPACK
|
|
LCLK
|
|
LDS
|
|
LLM
|
|
LLMs
|
|
LLVM
|
|
LM
|
|
LSAN
|
|
LSan
|
|
LTS
|
|
LSTMs
|
|
LanguageCrossEntropy
|
|
LoRA
|
|
MEM
|
|
MERCHANTABILITY
|
|
MFMA
|
|
MiB
|
|
MIGraphX
|
|
MIOpen
|
|
MIOpenGEMM
|
|
MIOpen's
|
|
MIVisionX
|
|
MLM
|
|
MMA
|
|
MMIO
|
|
MMIOH
|
|
MMU
|
|
MNIST
|
|
MPI
|
|
MPT
|
|
MSVC
|
|
MVAPICH
|
|
MVFFR
|
|
Makefile
|
|
Makefiles
|
|
Matplotlib
|
|
Matrox
|
|
MaxText
|
|
Megatrends
|
|
Megatron
|
|
Mellanox
|
|
Mellanox's
|
|
Meta's
|
|
Miniconda
|
|
MirroredStrategy
|
|
Mixtral
|
|
MosaicML
|
|
Mpops
|
|
Multicore
|
|
Multithreaded
|
|
MyEnvironment
|
|
MyST
|
|
NBIO
|
|
NBIOs
|
|
NCCL
|
|
NCF
|
|
NFS
|
|
NIC
|
|
NICs
|
|
NLI
|
|
NLP
|
|
NN
|
|
NPKit
|
|
NPS
|
|
NSP
|
|
NUMA
|
|
NVCC
|
|
NVIDIA
|
|
NVPTX
|
|
NaN
|
|
Nano
|
|
Navi
|
|
Noncoherently
|
|
NoReturn
|
|
NousResearch's
|
|
NumPy
|
|
OAM
|
|
OAMs
|
|
OCP
|
|
OEM
|
|
OFED
|
|
OMM
|
|
OMP
|
|
OMPI
|
|
OMPT
|
|
OMPX
|
|
ONNX
|
|
OSS
|
|
OSU
|
|
OpenCL
|
|
OpenCV
|
|
OpenFabrics
|
|
OpenGL
|
|
OpenMP
|
|
OpenMPI
|
|
OpenSSL
|
|
OpenVX
|
|
OpenXLA
|
|
Optim
|
|
Oversubscription
|
|
PagedAttention
|
|
Pallas
|
|
PCC
|
|
PCI
|
|
PCIe
|
|
PEFT
|
|
PEQT
|
|
PIL
|
|
PILImage
|
|
POR
|
|
PRNG
|
|
PRs
|
|
PaLM
|
|
Pageable
|
|
PeerDirect
|
|
PerfDb
|
|
Perfetto
|
|
PipelineParallel
|
|
PnP
|
|
PowerEdge
|
|
PowerShell
|
|
Pretrained
|
|
Pretraining
|
|
Profiler's
|
|
PyPi
|
|
Pytest
|
|
PyTorch
|
|
Qcycles
|
|
Qwen
|
|
RAII
|
|
RAS
|
|
RCCL
|
|
RDC
|
|
RDC's
|
|
RDMA
|
|
RDNA
|
|
README
|
|
Recomputation
|
|
RHEL
|
|
RMW
|
|
RNN
|
|
RNNs
|
|
ROC
|
|
ROCProfiler
|
|
ROCT
|
|
ROCTx
|
|
ROCTracer
|
|
ROCclr
|
|
ROCdbgapi
|
|
ROCgdb
|
|
ROCk
|
|
ROCm
|
|
ROCmCC
|
|
ROCmSoftwarePlatform
|
|
ROCmValidationSuite
|
|
ROCprofiler
|
|
ROCr
|
|
RPP
|
|
RST
|
|
RW
|
|
Radeon
|
|
RelWithDebInfo
|
|
Req
|
|
Rickle
|
|
RoCE
|
|
Runfile
|
|
Ryzen
|
|
SALU
|
|
SBIOS
|
|
SCA
|
|
ScaledGEMM
|
|
SDK
|
|
SDMA
|
|
SDPA
|
|
SDRAM
|
|
SENDMSG
|
|
SGPR
|
|
SGPRs
|
|
SHA
|
|
SHARK's
|
|
SIGQUIT
|
|
SIMD
|
|
SIMDs
|
|
SKU
|
|
SKUs
|
|
SLES
|
|
SLURM
|
|
SMEM
|
|
SMI
|
|
SMT
|
|
SPI
|
|
SQs
|
|
SRAM
|
|
SRAMECC
|
|
SVD
|
|
SWE
|
|
SerDes
|
|
ShareGPT
|
|
Shlens
|
|
Skylake
|
|
Softmax
|
|
Spack
|
|
SplitK
|
|
Supermicro
|
|
Szegedy
|
|
TCA
|
|
TCC
|
|
TCI
|
|
TCIU
|
|
TCP
|
|
TCR
|
|
TensorRT
|
|
TensorFloat
|
|
TF
|
|
TFLOPS
|
|
TP
|
|
TPS
|
|
TPU
|
|
TPUs
|
|
TSME
|
|
Tagram
|
|
TensileLite
|
|
TensorBoard
|
|
TensorFlow
|
|
TensorParallel
|
|
ToC
|
|
TorchAudio
|
|
torchaudio
|
|
TorchElastic
|
|
TorchMIGraphX
|
|
torchrec
|
|
TorchScript
|
|
TorchServe
|
|
torchserve
|
|
torchtext
|
|
TorchVision
|
|
TransferBench
|
|
TrapStatus
|
|
UAC
|
|
UC
|
|
UCC
|
|
UCX
|
|
UE
|
|
UIF
|
|
UMC
|
|
USM
|
|
UTCL
|
|
UTIL
|
|
UltraChat
|
|
Uncached
|
|
Unittests
|
|
Unhandled
|
|
VALU
|
|
VBIOS
|
|
VCN
|
|
VGPR
|
|
VGPRs
|
|
VM
|
|
VMEM
|
|
VMWare
|
|
VRAM
|
|
VSIX
|
|
VSkipped
|
|
Vanhoucke
|
|
Vulkan
|
|
WGP
|
|
WGPs
|
|
WX
|
|
WikiText
|
|
Wojna
|
|
Workgroups
|
|
Writebacks
|
|
XCD
|
|
XCDs
|
|
XGBoost
|
|
XGBoost's
|
|
XGMI
|
|
XT
|
|
XTX
|
|
Xeon
|
|
Xilinx
|
|
Xnack
|
|
Xteam
|
|
YAML
|
|
YML
|
|
YModel
|
|
ZeRO
|
|
ZenDNN
|
|
accuracies
|
|
activations
|
|
addr
|
|
ade
|
|
ai
|
|
alloc
|
|
allocatable
|
|
allocator
|
|
allocators
|
|
amdgpu
|
|
api
|
|
aten
|
|
atmi
|
|
atomics
|
|
autogenerated
|
|
autotune
|
|
avx
|
|
awk
|
|
backend
|
|
backends
|
|
bb
|
|
benchmarked
|
|
benchmarking
|
|
bfloat
|
|
bilinear
|
|
bitcode
|
|
bitsandbytes
|
|
bitwise
|
|
Bitwise
|
|
blit
|
|
bootloader
|
|
boson
|
|
bosons
|
|
br
|
|
BrainFloat
|
|
buildable
|
|
bursty
|
|
bzip
|
|
cacheable
|
|
carveout
|
|
cd
|
|
centos
|
|
centric
|
|
changelog
|
|
checkpointing
|
|
chiplet
|
|
cmake
|
|
cmd
|
|
coalescable
|
|
codename
|
|
collater
|
|
comgr
|
|
completers
|
|
composable
|
|
concretization
|
|
config
|
|
conformant
|
|
constructible
|
|
convolutional
|
|
convolves
|
|
copyable
|
|
cpp
|
|
csn
|
|
cuBLAS
|
|
cuda
|
|
cuDNN
|
|
cudnn
|
|
cuFFT
|
|
cuLIB
|
|
cuRAND
|
|
cuSOLVER
|
|
cuSPARSE
|
|
customizations
|
|
cTDP
|
|
dataset
|
|
datasets
|
|
dataspace
|
|
datatemplate
|
|
datatype
|
|
datatypes
|
|
dbgapi
|
|
de
|
|
deallocation
|
|
debuggability
|
|
debian
|
|
deepseek
|
|
denoise
|
|
denoised
|
|
denoises
|
|
denormalize
|
|
dequantization
|
|
dequantizes
|
|
deserializers
|
|
detections
|
|
dev
|
|
devicelibs
|
|
devsel
|
|
dimensionality
|
|
disambiguates
|
|
distro
|
|
distros
|
|
dkms
|
|
dtype
|
|
eb
|
|
el
|
|
embeddings
|
|
enablement
|
|
encodings
|
|
endfor
|
|
endif
|
|
endpgm
|
|
enqueue
|
|
env
|
|
epilog
|
|
etcetera
|
|
ethernet
|
|
exascale
|
|
executables
|
|
ffmpeg
|
|
filesystem
|
|
fortran
|
|
fp
|
|
framebuffer
|
|
gRPC
|
|
galb
|
|
gcc
|
|
gdb
|
|
gemm
|
|
gfortran
|
|
gfx
|
|
githooks
|
|
github
|
|
globals
|
|
gnupg
|
|
grayscale
|
|
gzip
|
|
heterogenous
|
|
hipBLAS
|
|
hipBLASLt
|
|
hipBLASLt's
|
|
hipblaslt
|
|
hipCUB
|
|
hipFFT
|
|
hipFORT
|
|
hipLIB
|
|
hipRAND
|
|
hipSOLVER
|
|
hipSPARSE
|
|
hipSPARSELt
|
|
hipTensor
|
|
hipamd
|
|
hipblas
|
|
hipcc
|
|
hipcub
|
|
hipfft
|
|
hipfort
|
|
hipify
|
|
hipsolver
|
|
hipsparse
|
|
hlist
|
|
hostname
|
|
hotspotting
|
|
hpc
|
|
hpp
|
|
hsa
|
|
hsakmt
|
|
hyperparameter
|
|
hyperparameters
|
|
iDRAC
|
|
ib_core
|
|
inband
|
|
incrementing
|
|
inductor
|
|
inferencing
|
|
inflight
|
|
init
|
|
initializer
|
|
inlining
|
|
installable
|
|
interop
|
|
interprocedural
|
|
intra
|
|
intrinsics
|
|
invariants
|
|
invocating
|
|
ipo
|
|
jax
|
|
kdb
|
|
kfd
|
|
kv
|
|
lang
|
|
latencies
|
|
len
|
|
libfabric
|
|
libjpeg
|
|
libs
|
|
linalg
|
|
linearized
|
|
linter
|
|
linux
|
|
llvm
|
|
localscratch
|
|
logits
|
|
lossy
|
|
macOS
|
|
matchers
|
|
microarchitecture
|
|
migraphx
|
|
migratable
|
|
miopen
|
|
miopengemm
|
|
mivisionx
|
|
mixtral
|
|
mjx
|
|
mkdir
|
|
mlirmiopen
|
|
mtypes
|
|
mutex
|
|
mvffr
|
|
namespace
|
|
namespaces
|
|
nanoGPT
|
|
NCS
|
|
NOP
|
|
NVLink
|
|
num
|
|
numref
|
|
ocl
|
|
opencl
|
|
opencv
|
|
openmp
|
|
openssl
|
|
optimizers
|
|
os
|
|
oversubscription
|
|
pageable
|
|
pallas
|
|
parallelization
|
|
parallelizing
|
|
param
|
|
parameterization
|
|
passthrough
|
|
perfcounter
|
|
performant
|
|
perl
|
|
pragma
|
|
pre
|
|
prebuild
|
|
prebuilt
|
|
precompiled
|
|
preconditioner
|
|
preconfigured
|
|
preemptible
|
|
prefetch
|
|
prefetchable
|
|
prefill
|
|
prefills
|
|
preloaded
|
|
preprocess
|
|
preprocessed
|
|
preprocessing
|
|
preprocessor
|
|
prequantized
|
|
prerequisites
|
|
pretraining
|
|
profiler
|
|
profilers
|
|
protobuf
|
|
pseudorandom
|
|
py
|
|
pytorch
|
|
recommender
|
|
recommenders
|
|
quantile
|
|
quantizer
|
|
quasirandom
|
|
queueing
|
|
radeon
|
|
rccl
|
|
rdc
|
|
rdma
|
|
reStructuredText
|
|
redirections
|
|
refactorization
|
|
reformats
|
|
repo
|
|
repos
|
|
representativeness
|
|
req
|
|
resampling
|
|
rescaling
|
|
reusability
|
|
roadmap
|
|
roc
|
|
rocAL
|
|
rocALUTION
|
|
rocBLAS
|
|
rocDecode
|
|
rocFFT
|
|
rocHPCG
|
|
rocJPEG
|
|
rocLIB
|
|
rocMLIR
|
|
rocPRIM
|
|
rocPyDecode
|
|
rocRAND
|
|
rocSOLVER
|
|
rocSPARSE
|
|
rocThrust
|
|
rocWMMA
|
|
rocalution
|
|
rocblas
|
|
rocclr
|
|
rocfft
|
|
rocm
|
|
rocminfo
|
|
rocprim
|
|
rocprof
|
|
rocprofv
|
|
rocprofiler
|
|
rocr
|
|
rocrand
|
|
rocsolver
|
|
rocsparse
|
|
rocthrust
|
|
roctracer
|
|
rst
|
|
runtime
|
|
runtimes
|
|
ResNet
|
|
sL
|
|
scalability
|
|
scalable
|
|
scipy
|
|
seealso
|
|
sendmsg
|
|
seqs
|
|
serializers
|
|
shader
|
|
sharding
|
|
sigmoid
|
|
sm
|
|
smi
|
|
softmax
|
|
spack
|
|
spmm
|
|
src
|
|
stochastically
|
|
strided
|
|
subcommand
|
|
subdirectory
|
|
subexpression
|
|
subfolder
|
|
subfolders
|
|
submatrix
|
|
submodule
|
|
submodules
|
|
subnet
|
|
supercomputing
|
|
symlink
|
|
symlinks
|
|
sys
|
|
tabindex
|
|
td
|
|
tensorfloat
|
|
th
|
|
tokenization
|
|
tokenize
|
|
tokenized
|
|
tokenizer
|
|
tokenizes
|
|
toolchain
|
|
toolchains
|
|
toolset
|
|
toolsets
|
|
torchvision
|
|
tqdm
|
|
tracebacks
|
|
txt
|
|
TopK
|
|
uarch
|
|
uncached
|
|
uncacheable
|
|
uncorrectable
|
|
underoptimized
|
|
unhandled
|
|
uninstallation
|
|
unmapped
|
|
unsqueeze
|
|
unstacking
|
|
unswitching
|
|
untrusted
|
|
untuned
|
|
upvote
|
|
USM
|
|
UTCL
|
|
UTIL
|
|
utils
|
|
vL
|
|
variational
|
|
vdi
|
|
vectorizable
|
|
vectorization
|
|
vectorize
|
|
vectorized
|
|
vectorizer
|
|
vectorizes
|
|
virtualize
|
|
virtualized
|
|
vjxb
|
|
vllm
|
|
voxel
|
|
walkthrough
|
|
walkthroughs
|
|
watchpoints
|
|
wavefront
|
|
wavefronts
|
|
whitespace
|
|
whitespaces
|
|
workgroup
|
|
workgroups
|
|
writeback
|
|
writebacks
|
|
wrreq
|
|
wzo
|
|
xargs
|
|
xGMI
|
|
xPacked
|
|
xz
|
|
yaml
|
|
ysvmadyb
|
|
zypper
|