mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-09 22:58:17 -05:00
164 lines
6.1 KiB
YAML
164 lines
6.1 KiB
YAML
vllm_benchmark:
|
|
unified_docker:
|
|
latest:
|
|
# TODO: update me
|
|
pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.1_20250715
|
|
docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250715/images/sha256-4a429705fa95a58f6d20aceab43b1b76fa769d57f32d5d28bd3f4e030e2a78ea
|
|
rocm_version: 6.4.1
|
|
vllm_version: 0.9.1 (0.9.2.dev364+gb432b7a28.rocm641)
|
|
pytorch_version: 2.7.0+gitf717b2a
|
|
hipblaslt_version: 0.15
|
|
model_groups:
|
|
- group: Meta Llama
|
|
tag: llama
|
|
models:
|
|
- model: Llama 3.1 8B
|
|
mad_tag: pyt_vllm_llama-3.1-8b
|
|
model_repo: meta-llama/Llama-3.1-8B-Instruct
|
|
url: https://huggingface.co/meta-llama/Llama-3.1-8B
|
|
precision: float16
|
|
- model: Llama 3.1 70B
|
|
mad_tag: pyt_vllm_llama-3.1-70b
|
|
model_repo: meta-llama/Llama-3.1-70B-Instruct
|
|
url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
|
|
precision: float16
|
|
- model: Llama 3.1 405B
|
|
mad_tag: pyt_vllm_llama-3.1-405b
|
|
model_repo: meta-llama/Llama-3.1-405B-Instruct
|
|
url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
|
|
precision: float16
|
|
- model: Llama 2 7B
|
|
mad_tag: pyt_vllm_llama-2-7b
|
|
model_repo: meta-llama/Llama-2-7b-chat-hf
|
|
url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
|
|
precision: float16
|
|
- model: Llama 2 70B
|
|
mad_tag: pyt_vllm_llama-2-70b
|
|
model_repo: meta-llama/Llama-2-70b-chat-hf
|
|
url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
|
|
precision: float16
|
|
- model: Llama 3.1 8B FP8
|
|
mad_tag: pyt_vllm_llama-3.1-8b_fp8
|
|
model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
|
|
url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
|
|
precision: float8
|
|
- model: Llama 3.1 70B FP8
|
|
mad_tag: pyt_vllm_llama-3.1-70b_fp8
|
|
model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
|
|
url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
|
|
precision: float8
|
|
- model: Llama 3.1 405B FP8
|
|
mad_tag: pyt_vllm_llama-3.1-405b_fp8
|
|
model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
|
|
url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
|
|
precision: float8
|
|
- group: Mistral AI
|
|
tag: mistral
|
|
models:
|
|
- model: Mixtral MoE 8x7B
|
|
mad_tag: pyt_vllm_mixtral-8x7b
|
|
model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
precision: float16
|
|
- model: Mixtral MoE 8x22B
|
|
mad_tag: pyt_vllm_mixtral-8x22b
|
|
model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
precision: float16
|
|
- model: Mistral 7B
|
|
mad_tag: pyt_vllm_mistral-7b
|
|
model_repo: mistralai/Mistral-7B-Instruct-v0.3
|
|
url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
|
|
precision: float16
|
|
- model: Mixtral MoE 8x7B FP8
|
|
mad_tag: pyt_vllm_mixtral-8x7b_fp8
|
|
model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
|
url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
|
|
precision: float8
|
|
- model: Mixtral MoE 8x22B FP8
|
|
mad_tag: pyt_vllm_mixtral-8x22b_fp8
|
|
model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
|
url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
|
|
precision: float8
|
|
- model: Mistral 7B FP8
|
|
mad_tag: pyt_vllm_mistral-7b_fp8
|
|
model_repo: amd/Mistral-7B-v0.1-FP8-KV
|
|
url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
|
|
precision: float8
|
|
- group: Qwen
|
|
tag: qwen
|
|
models:
|
|
- model: Qwen2 7B
|
|
mad_tag: pyt_vllm_qwen2-7b
|
|
model_repo: Qwen/Qwen2-7B-Instruct
|
|
url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
|
|
precision: float16
|
|
- model: Qwen2 72B
|
|
mad_tag: pyt_vllm_qwen2-72b
|
|
model_repo: Qwen/Qwen2-72B-Instruct
|
|
url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
|
|
precision: float16
|
|
- model: QwQ-32B
|
|
mad_tag: pyt_vllm_qwq-32b
|
|
model_repo: Qwen/QwQ-32B
|
|
url: https://huggingface.co/Qwen/QwQ-32B
|
|
precision: float16
|
|
tunableop: true
|
|
- group: Databricks DBRX
|
|
tag: dbrx
|
|
models:
|
|
- model: DBRX Instruct
|
|
mad_tag: pyt_vllm_dbrx-instruct
|
|
model_repo: databricks/dbrx-instruct
|
|
url: https://huggingface.co/databricks/dbrx-instruct
|
|
precision: float16
|
|
- model: DBRX Instruct FP8
|
|
mad_tag: pyt_vllm_dbrx_fp8
|
|
model_repo: amd/dbrx-instruct-FP8-KV
|
|
url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
|
|
precision: float8
|
|
- group: Google Gemma
|
|
tag: gemma
|
|
models:
|
|
- model: Gemma 2 27B
|
|
mad_tag: pyt_vllm_gemma-2-27b
|
|
model_repo: google/gemma-2-27b
|
|
url: https://huggingface.co/google/gemma-2-27b
|
|
precision: float16
|
|
- group: Cohere
|
|
tag: cohere
|
|
models:
|
|
- model: C4AI Command R+ 08-2024
|
|
mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
|
|
model_repo: CohereForAI/c4ai-command-r-plus-08-2024
|
|
url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
|
|
precision: float16
|
|
- model: C4AI Command R+ 08-2024 FP8
|
|
mad_tag: pyt_vllm_command-r-plus_fp8
|
|
model_repo: amd/c4ai-command-r-plus-FP8-KV
|
|
url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
|
|
precision: float8
|
|
- group: DeepSeek
|
|
tag: deepseek
|
|
models:
|
|
- model: DeepSeek MoE 16B
|
|
mad_tag: pyt_vllm_deepseek-moe-16b-chat
|
|
model_repo: deepseek-ai/deepseek-moe-16b-chat
|
|
url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
|
|
precision: float16
|
|
- group: Microsoft Phi
|
|
tag: phi
|
|
models:
|
|
- model: Phi-4
|
|
mad_tag: pyt_vllm_phi-4
|
|
model_repo: microsoft/phi-4
|
|
url: https://huggingface.co/microsoft/phi-4
|
|
- group: TII Falcon
|
|
tag: falcon
|
|
models:
|
|
- model: Falcon 180B
|
|
mad_tag: pyt_vllm_falcon-180b
|
|
model_repo: tiiuae/falcon-180B
|
|
url: https://huggingface.co/tiiuae/falcon-180B
|
|
precision: float16
|