mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Update Megatron-LM training benchmark doc for v25.6 release (#5064)
This commit is contained in:
@@ -1,29 +1,60 @@
|
||||
megatron-lm_benchmark:
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
dockers:
|
||||
- pull_tag: rocm/megatron-lm:v25.6_py312
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py312/images/sha256-482ff906532285bceabdf2bda629bd32cb6174d2d07f4243a736378001b28df0
|
||||
components:
|
||||
ROCm: 6.4.1
|
||||
PyTorch: 2.8.0a0+git7d205b2
|
||||
Python: 3.12
|
||||
Transformer Engine: 2.1.0.dev0+8c4a512
|
||||
hipBLASLt: 393e413
|
||||
Triton: 3.3.0
|
||||
RCCL: 2.23.4.7a84c5d
|
||||
doc_name: Ubuntu 24.04 + Python 3.12
|
||||
- pull_tag: rocm/megatron-lm:v25.6_py310
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/megatron-lm/v25.6_py310/images/sha256-9627bd9378684fe26cb1a10c7dd817868f553b33402e49b058355b0f095568d6
|
||||
components:
|
||||
ROCm: 6.4.1
|
||||
PyTorch: 2.8.0a0+git7d205b2
|
||||
Python: "3.10"
|
||||
Transformer Engine: 2.1.0.dev0+8c4a512
|
||||
hipBLASLt: 393e413
|
||||
Triton: 3.3.0
|
||||
RCCL: 2.23.4.7a84c5d
|
||||
doc_name: Ubuntu 22.04 + Python 3.10
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.3 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
|
||||
- model: Llama 3.1 70B (proxy)
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.1-70b-proxy
|
||||
- model: Llama 2 7B
|
||||
mad_tag: pyt_megatron_lm_train_llama-2-7b
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-2-70b
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek-V3
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek-V3 (proxy)
|
||||
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
|
||||
- model: DeepSeek-V2-Lite
|
||||
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral 8x7B
|
||||
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
|
||||
- model: Mixtral 8x22B
|
||||
- model: Mixtral 8x22B (proxy)
|
||||
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
|
||||
- group: Qwen
|
||||
tag: qwen
|
||||
models:
|
||||
- model: Qwen 2.5 7B
|
||||
mad_tag: pyt_megatron_lm_train_qwen2.5-7b
|
||||
- model: Qwen 2.5 72B
|
||||
mad_tag: pyt_megatron_lm_train_qwen2.5-72b
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
megatron-lm_benchmark:
|
||||
model_groups:
|
||||
- group: Meta Llama
|
||||
tag: llama
|
||||
models:
|
||||
- model: Llama 3.3 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.3-70b
|
||||
- model: Llama 3.1 8B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.1-8b
|
||||
- model: Llama 3.1 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-3.1-70b
|
||||
- model: Llama 2 7B
|
||||
mad_tag: pyt_megatron_lm_train_llama-2-7b
|
||||
- model: Llama 2 70B
|
||||
mad_tag: pyt_megatron_lm_train_llama-2-70b
|
||||
- group: DeepSeek
|
||||
tag: deepseek
|
||||
models:
|
||||
- model: DeepSeek-V3
|
||||
mad_tag: pyt_megatron_lm_train_deepseek-v3-proxy
|
||||
- model: DeepSeek-V2-Lite
|
||||
mad_tag: pyt_megatron_lm_train_deepseek-v2-lite-16b
|
||||
- group: Mistral AI
|
||||
tag: mistral
|
||||
models:
|
||||
- model: Mixtral 8x7B
|
||||
mad_tag: pyt_megatron_lm_train_mixtral-8x7b
|
||||
- model: Mixtral 8x22B
|
||||
mad_tag: pyt_megatron_lm_train_mixtral-8x22b-proxy
|
||||
Reference in New Issue
Block a user