Add Megatron-LM benchmark doc 5/2 (#4778)

* reorg files

* add tabs

* update template

* update template

* update wordlist and toc

* add previous version to doc

* add selector paragraph

* update wordlist.txt

(cherry picked from commit 9ed65a81c4)
This commit is contained in:
Peter Park
2025-05-22 14:28:18 -04:00
parent 05773ca41e
commit 9b69755b99
7 changed files with 547 additions and 368 deletions

View File

@@ -272,6 +272,7 @@ NBIO
NBIOs
NCCL
NCF
NFS
NIC
NICs
NLI
@@ -500,6 +501,7 @@ ZenDNN
accuracies
activations
addr
ade
ai
alloc
allocatable
@@ -515,6 +517,7 @@ avx
awk
backend
backends
bb
benchmarked
benchmarking
bfloat
@@ -538,6 +541,7 @@ cd
centos
centric
changelog
checkpointing
chiplet
cmake
cmd
@@ -578,6 +582,7 @@ de
deallocation
debuggability
debian
deepseek
denoise
denoised
denoises
@@ -601,6 +606,7 @@ embeddings
enablement
encodings
endfor
endif
endpgm
enqueue
env
@@ -702,6 +708,7 @@ migratable
miopen
miopengemm
mivisionx
mixtral
mjx
mkdir
mlirmiopen
@@ -843,6 +850,7 @@ subfolder
subfolders
submodule
submodules
subnet
supercomputing
symlink
symlinks

View File

@@ -0,0 +1,29 @@
megatron-lm_benchmark:
model_groups:
- group: Llama
tag: llama
models:
- model: Llama 3.3 70B
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
- model: Llama 3.1 8B
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
- model: Llama 3.1 70B
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
- model: Llama 2 7B
mad_tag: pyt_megatron_lm_train_llama-2-7b
- model: Llama 2 70B
mad_tag: pyt_megatron_lm_train_llama-2-70b
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V3
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
- model: DeepSeek-V2-Lite
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
- group: Mistral
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
- model: Mixtral 8x22B
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy

View File

@@ -44,11 +44,11 @@ subtrees:
title: Training
subtrees:
- entries:
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm
- file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
title: Train a model with Megatron-LM
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training
- file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
title: Train a model with PyTorch
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext
- file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
title: Train a model with JAX MaxText
- file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
title: Train a model with LLM Foundry
@@ -78,9 +78,9 @@ subtrees:
title: Run models from Hugging Face
- file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
title: LLM inference frameworks
- file: how-to/rocm-for-ai/inference/vllm-benchmark.rst
- file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
title: vLLM inference performance testing
- file: how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
- file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
title: PyTorch inference performance testing
- file: how-to/rocm-for-ai/inference/deploy-your-model.rst
title: Deploy your model