Add docs for Maxtext 26.1 Docker release (#5936)

* archive previous version

* update doc

* add multi node for llama3 405b

fix
This commit is contained in:
peterjunpark
2026-02-06 13:29:05 -05:00
committed by GitHub
parent 5b3a9c3c33
commit 1d5baf2c73
6 changed files with 554 additions and 84 deletions

View File

@@ -1,11 +1,11 @@
dockers:
- pull_tag: rocm/jax-training:maxtext-v25.11
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.11/images/sha256-18e4d8f0b8ce7a7422c58046940dd5f32249960449fca09a562b65fb8eb1562a
- pull_tag: rocm/jax-training:maxtext-v26.1
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v26.1/images/sha256-901083bde353fe6362ada3036e452c792b2c96124e5900f4e9b5946c02ff9d6a
components:
ROCm: 7.1.0
JAX: 0.7.1
ROCm: 7.1.1
JAX: 0.8.2
Python: 3.12
Transformer Engine: 2.4.0.dev0+281042de
Transformer Engine: 2.8.0.dev0+aec00a7f
hipBLASLt: 1.2.x
model_groups:
- group: Meta Llama
@@ -15,21 +15,29 @@ model_groups:
mad_tag: jax_maxtext_train_llama-2-7b
model_repo: Llama-2-7B
precision: bf16
multinode_training_script: llama2_7b_multinode.sh
multinode_config:
gfx950: env_scripts/gfx950_llama2_7b.yml
gfx942: env_scripts/llama2_7b.yml
doc_options: ["single-node", "multi-node"]
- model: Llama 2 70B
mad_tag: jax_maxtext_train_llama-2-70b
model_repo: Llama-2-70B
precision: bf16
multinode_training_script: llama2_70b_multinode.sh
multinode_config:
gfx950: env_scripts/gfx950_llama2_70b.yml
gfx942: env_scripts/llama2_70b.yml
doc_options: ["single-node", "multi-node"]
- model: Llama 3 8B (multi-node)
mad_tag: jax_maxtext_train_llama-3-8b
multinode_training_script: llama3_8b_multinode.sh
multinode_config:
gfx950: env_scripts/gfx950_llama3_8b.yml
gfx942: env_scripts/llama3_8b.yml
doc_options: ["multi-node"]
- model: Llama 3 70B (multi-node)
mad_tag: jax_maxtext_train_llama-3-70b
multinode_training_script: llama3_70b_multinode.sh
multinode_config:
gfx950: env_scripts/gfx950_llama3_70b.yml
gfx942: env_scripts/llama3_70b.yml
doc_options: ["multi-node"]
- model: Llama 3.1 8B
mad_tag: jax_maxtext_train_llama-3.1-8b
@@ -41,11 +49,21 @@ model_groups:
model_repo: Llama-3.1-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 405B
mad_tag: jax_maxtext_train_llama-3.1-405b
model_repo: Llama-3.1-405B
precision: bf16
multinode_config:
gfx950: env_scripts/gfx950_llama3_405b.yml
doc_options: ["single-node", "multi-node"]
- model: Llama 3.3 70B
mad_tag: jax_maxtext_train_llama-3.3-70b
model_repo: Llama-3.3-70B
precision: bf16
doc_options: ["single-node"]
multinode_config:
gfx950: env_scripts/gfx950_llama3.3_70b.yml
gfx942: env_scripts/llama3.3_70b.yml
doc_options: ["single-node", "multi-node"]
- group: DeepSeek
tag: deepseek
models:
@@ -53,7 +71,10 @@ model_groups:
mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
model_repo: DeepSeek-V2-lite
precision: bf16
doc_options: ["single-node"]
multinode_config:
gfx950: env_scripts/gfx950_deepseek2_16b.yml
gfx942: env_scripts/deepseek2_16b.yml
doc_options: ["single-node", "multi-node"]
- group: Mistral AI
tag: mistral
models:
@@ -61,4 +82,7 @@ model_groups:
mad_tag: jax_maxtext_train_mixtral-8x7b
model_repo: Mixtral-8x7B
precision: bf16
doc_options: ["single-node"]
multinode_config:
gfx950: env_scripts/gfx950_mixtral_8x7b.yml
gfx942: env_scripts/llama3_8x7b.yml
doc_options: ["single-node", "multi-node"]

View File

@@ -0,0 +1,64 @@
dockers:
- pull_tag: rocm/jax-training:maxtext-v25.11
docker_hub_url: https://hub.docker.com/layers/rocm/jax-training/maxtext-v25.11/images/sha256-18e4d8f0b8ce7a7422c58046940dd5f32249960449fca09a562b65fb8eb1562a
components:
ROCm: 7.1.0
JAX: 0.7.1
Python: 3.12
Transformer Engine: 2.4.0.dev0+281042de
hipBLASLt: 1.2.x
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 2 7B
mad_tag: jax_maxtext_train_llama-2-7b
model_repo: Llama-2-7B
precision: bf16
multinode_training_script: llama2_7b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 2 70B
mad_tag: jax_maxtext_train_llama-2-70b
model_repo: Llama-2-70B
precision: bf16
multinode_training_script: llama2_70b_multinode.sh
doc_options: ["single-node", "multi-node"]
- model: Llama 3 8B (multi-node)
mad_tag: jax_maxtext_train_llama-3-8b
multinode_training_script: llama3_8b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3 70B (multi-node)
mad_tag: jax_maxtext_train_llama-3-70b
multinode_training_script: llama3_70b_multinode.sh
doc_options: ["multi-node"]
- model: Llama 3.1 8B
mad_tag: jax_maxtext_train_llama-3.1-8b
model_repo: Llama-3.1-8B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.1 70B
mad_tag: jax_maxtext_train_llama-3.1-70b
model_repo: Llama-3.1-70B
precision: bf16
doc_options: ["single-node"]
- model: Llama 3.3 70B
mad_tag: jax_maxtext_train_llama-3.3-70b
model_repo: Llama-3.3-70B
precision: bf16
doc_options: ["single-node"]
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek-V2-Lite (16B)
mad_tag: jax_maxtext_train_deepseek-v2-lite-16b
model_repo: DeepSeek-V2-lite
precision: bf16
doc_options: ["single-node"]
- group: Mistral AI
tag: mistral
models:
- model: Mixtral 8x7B
mad_tag: jax_maxtext_train_mixtral-8x7b
model_repo: Mixtral-8x7B
precision: bf16
doc_options: ["single-node"]