Add Megatron-LM benchmark doc 5/2 (#4778)

* reorg files * add tabs * update template * update template * update wordlist and toc * add previous version to doc * add selector paragraph * update wordlist.txt (cherry picked from commit 9ed65a81c4)
2026-01-10 07:08:08 -05:00 · 2025-05-22 14:28:18 -04:00
parent 05773ca41e
commit 9b69755b99
7 changed files with 547 additions and 368 deletions
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -272,6 +272,7 @@ NBIO
 NBIOs
 NCCL
 NCF
+NFS
 NIC
 NICs
 NLI
@@ -500,6 +501,7 @@ ZenDNN
 accuracies
 activations
 addr
+ade
 ai
 alloc
 allocatable
@@ -515,6 +517,7 @@ avx
 awk
 backend
 backends
+bb
 benchmarked
 benchmarking
 bfloat
@@ -538,6 +541,7 @@ cd
 centos
 centric
 changelog
+checkpointing
 chiplet
 cmake
 cmd
@@ -578,6 +582,7 @@ de
 deallocation
 debuggability
 debian
+deepseek
 denoise
 denoised
 denoises
@@ -601,6 +606,7 @@ embeddings
 enablement
 encodings
 endfor
+endif
 endpgm
 enqueue
 env
@@ -702,6 +708,7 @@ migratable
 miopen
 miopengemm
 mivisionx
+mixtral
 mjx
 mkdir
 mlirmiopen
@@ -843,6 +850,7 @@ subfolder
 subfolders
 submodule
 submodules
+subnet
 supercomputing
 symlink
 symlinks
--- a/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/megatron-lm-benchmark-models.yaml
@@ -0,0 +1,29 @@
+megatron-lm_benchmark:
+  model_groups:
+    - group: Llama
+      tag: llama
+      models:
+      - model: Llama 3.3 70B
+        mad_tag: pyt_megatron_lm_train_llama-3.3-70b
+      - model: Llama 3.1 8B
+        mad_tag: pyt_megatron_lm_train_llama-3.1-8b
+      - model: Llama 3.1 70B
+        mad_tag: pyt_megatron_lm_train_llama-3.1-70b
+      - model: Llama 2 7B
+        mad_tag: pyt_megatron_lm_train_llama-2-7b
+      - model: Llama 2 70B
+        mad_tag: pyt_megatron_lm_train_llama-2-70b
+    - group: DeepSeek
+      tag: deepseek
+      models:
+      - model: DeepSeek-V3
+        mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
+      - model: DeepSeek-V2-Lite
+        mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
+    - group: Mistral
+      tag: mistral
+      models:
+      - model: Mixtral 8x7B
+        mad_tag: pyt_megatron_lm_train_mixtral-8x7b
+      - model: Mixtral 8x22B
+        mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
--- a/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -44,11 +44,11 @@ subtrees:
        title: Training
        subtrees:
        - entries:
-          - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm
+          - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
            title: Train a model with Megatron-LM
-          - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training
+          - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
            title: Train a model with PyTorch
-          - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext
+          - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
            title: Train a model with JAX MaxText
          - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
            title: Train a model with LLM Foundry
@@ -78,9 +78,9 @@ subtrees:
            title: Run models from Hugging Face
          - file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
            title: LLM inference frameworks
-          - file: how-to/rocm-for-ai/inference/vllm-benchmark.rst
+          - file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
            title: vLLM inference performance testing
-          - file: how-to/rocm-for-ai/inference/pytorch-inference-benchmark.rst
+          - file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
            title: PyTorch inference performance testing
          - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
            title: Deploy your model