Update vLLM Docker doc for 07/02

This commit is contained in:
Peter Park
2025-07-09 11:38:27 -04:00
committed by GitHub
parent 1c7cff8a47
commit d471b04cd5
5 changed files with 529 additions and 7 deletions

View File

@@ -0,0 +1,162 @@
vllm_benchmark:
unified_docker:
latest:
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c
rocm_version: 6.4.1
vllm_version: 0.9.0.1 (0.9.0.2.dev108+g71faa1880.rocm641)
pytorch_version: 2.7.0+gitf717b2a
hipblaslt_version: 0.15
model_groups:
- group: Meta Llama
tag: llama
models:
- model: Llama 3.1 8B
mad_tag: pyt_vllm_llama-3.1-8b
model_repo: meta-llama/Llama-3.1-8B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-8B
precision: float16
- model: Llama 3.1 70B
mad_tag: pyt_vllm_llama-3.1-70b
model_repo: meta-llama/Llama-3.1-70B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
precision: float16
- model: Llama 3.1 405B
mad_tag: pyt_vllm_llama-3.1-405b
model_repo: meta-llama/Llama-3.1-405B-Instruct
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
precision: float16
- model: Llama 2 7B
mad_tag: pyt_vllm_llama-2-7b
model_repo: meta-llama/Llama-2-7b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
precision: float16
- model: Llama 2 70B
mad_tag: pyt_vllm_llama-2-70b
model_repo: meta-llama/Llama-2-70b-chat-hf
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
precision: float16
- model: Llama 3.1 8B FP8
mad_tag: pyt_vllm_llama-3.1-8b_fp8
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 70B FP8
mad_tag: pyt_vllm_llama-3.1-70b_fp8
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
precision: float8
- model: Llama 3.1 405B FP8
mad_tag: pyt_vllm_llama-3.1-405b_fp8
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
precision: float8
- group: Mistral AI
tag: mistral
models:
- model: Mixtral MoE 8x7B
mad_tag: pyt_vllm_mixtral-8x7b
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
precision: float16
- model: Mixtral MoE 8x22B
mad_tag: pyt_vllm_mixtral-8x22b
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
precision: float16
- model: Mistral 7B
mad_tag: pyt_vllm_mistral-7b
model_repo: mistralai/Mistral-7B-Instruct-v0.3
url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
precision: float16
- model: Mixtral MoE 8x7B FP8
mad_tag: pyt_vllm_mixtral-8x7b_fp8
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
precision: float8
- model: Mixtral MoE 8x22B FP8
mad_tag: pyt_vllm_mixtral-8x22b_fp8
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
precision: float8
- model: Mistral 7B FP8
mad_tag: pyt_vllm_mistral-7b_fp8
model_repo: amd/Mistral-7B-v0.1-FP8-KV
url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
precision: float8
- group: Qwen
tag: qwen
models:
- model: Qwen2 7B
mad_tag: pyt_vllm_qwen2-7b
model_repo: Qwen/Qwen2-7B-Instruct
url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
precision: float16
- model: Qwen2 72B
mad_tag: pyt_vllm_qwen2-72b
model_repo: Qwen/Qwen2-72B-Instruct
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
precision: float16
- model: QwQ-32B
mad_tag: pyt_vllm_qwq-32b
model_repo: Qwen/QwQ-32B
url: https://huggingface.co/Qwen/QwQ-32B
precision: float16
tunableop: true
- group: Databricks DBRX
tag: dbrx
models:
- model: DBRX Instruct
mad_tag: pyt_vllm_dbrx-instruct
model_repo: databricks/dbrx-instruct
url: https://huggingface.co/databricks/dbrx-instruct
precision: float16
- model: DBRX Instruct FP8
mad_tag: pyt_vllm_dbrx_fp8
model_repo: amd/dbrx-instruct-FP8-KV
url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
precision: float8
- group: Google Gemma
tag: gemma
models:
- model: Gemma 2 27B
mad_tag: pyt_vllm_gemma-2-27b
model_repo: google/gemma-2-27b
url: https://huggingface.co/google/gemma-2-27b
precision: float16
- group: Cohere
tag: cohere
models:
- model: C4AI Command R+ 08-2024
mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
model_repo: CohereForAI/c4ai-command-r-plus-08-2024
url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
precision: float16
- model: C4AI Command R+ 08-2024 FP8
mad_tag: pyt_vllm_command-r-plus_fp8
model_repo: amd/c4ai-command-r-plus-FP8-KV
url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
precision: float8
- group: DeepSeek
tag: deepseek
models:
- model: DeepSeek MoE 16B
mad_tag: pyt_vllm_deepseek-moe-16b-chat
model_repo: deepseek-ai/deepseek-moe-16b-chat
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
precision: float16
- group: Microsoft Phi
tag: phi
models:
- model: Phi-4
mad_tag: pyt_vllm_phi-4
model_repo: microsoft/phi-4
url: https://huggingface.co/microsoft/phi-4
- group: TII Falcon
tag: falcon
models:
- model: Falcon 180B
mad_tag: pyt_vllm_falcon-180b
model_repo: tiiuae/falcon-180B
url: https://huggingface.co/tiiuae/falcon-180B
precision: float16

View File

@@ -1,10 +1,11 @@
vllm_benchmark:
unified_docker:
latest:
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.0.1_20250605
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.0.1_20250605/images/sha256-f48beeb3d72663a93c77211eb45273d564451447c097e060befa713d565fa36c
# TODO: update me
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.1_20250702
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250702/images/sha256-45068a2079cb8df554ed777141bf0c67d6627c470a897256e60c9f262677faab
rocm_version: 6.4.1
vllm_version: 0.9.0.1 (0.9.0.2.dev108+g71faa1880.rocm641)
vllm_version: 0.9.1 (0.9.2.dev206+gb335519f2.rocm641)
pytorch_version: 2.7.0+gitf717b2a
hipblaslt_version: 0.15
model_groups: