ROCm/docs/data/how-to/rocm-for-ai/inference/vllm-benchmark-models.yaml

vllm_benchmark:
  unified_docker:
    latest:
      # TODO: update me
      pull_tag: rocm/vllm:rocm6.4.1_vllm_0.9.1_20250715
      docker_hub_url: https://hub.docker.com/layers/rocm/vllm/rocm6.4.1_vllm_0.9.1_20250715/images/sha256-4a429705fa95a58f6d20aceab43b1b76fa769d57f32d5d28bd3f4e030e2a78ea
      rocm_version: 6.4.1
      vllm_version: 0.9.1 (0.9.2.dev364+gb432b7a28.rocm641)
      pytorch_version: 2.7.0+gitf717b2a
      hipblaslt_version: 0.15
  model_groups:
    - group: Meta Llama
      tag: llama
      models:
      - model: Llama 3.1 8B
        mad_tag: pyt_vllm_llama-3.1-8b
        model_repo: meta-llama/Llama-3.1-8B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-8B
        precision: float16
      - model: Llama 3.1 70B
        mad_tag: pyt_vllm_llama-3.1-70b
        model_repo: meta-llama/Llama-3.1-70B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
        precision: float16
      - model: Llama 3.1 405B
        mad_tag: pyt_vllm_llama-3.1-405b
        model_repo: meta-llama/Llama-3.1-405B-Instruct
        url: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
        precision: float16
      - model: Llama 2 7B
        mad_tag: pyt_vllm_llama-2-7b
        model_repo: meta-llama/Llama-2-7b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
        precision: float16
      - model: Llama 2 70B
        mad_tag: pyt_vllm_llama-2-70b
        model_repo: meta-llama/Llama-2-70b-chat-hf
        url: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
        precision: float16
      - model: Llama 3.1 8B FP8
        mad_tag: pyt_vllm_llama-3.1-8b_fp8
        model_repo: amd/Llama-3.1-8B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-8B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 70B FP8
        mad_tag: pyt_vllm_llama-3.1-70b_fp8
        model_repo: amd/Llama-3.1-70B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-70B-Instruct-FP8-KV
        precision: float8
      - model: Llama 3.1 405B FP8
        mad_tag: pyt_vllm_llama-3.1-405b_fp8
        model_repo: amd/Llama-3.1-405B-Instruct-FP8-KV
        url: https://huggingface.co/amd/Llama-3.1-405B-Instruct-FP8-KV
        precision: float8
    - group: Mistral AI
      tag: mistral
      models:
      - model: Mixtral MoE 8x7B
        mad_tag: pyt_vllm_mixtral-8x7b
        model_repo: mistralai/Mixtral-8x7B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
        precision: float16
      - model: Mixtral MoE 8x22B
        mad_tag: pyt_vllm_mixtral-8x22b
        model_repo: mistralai/Mixtral-8x22B-Instruct-v0.1
        url: https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1
        precision: float16
      - model: Mistral 7B
        mad_tag: pyt_vllm_mistral-7b
        model_repo: mistralai/Mistral-7B-Instruct-v0.3
        url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
        precision: float16
      - model: Mixtral MoE 8x7B FP8
        mad_tag: pyt_vllm_mixtral-8x7b_fp8
        model_repo: amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV
        precision: float8
      - model: Mixtral MoE 8x22B FP8
        mad_tag: pyt_vllm_mixtral-8x22b_fp8
        model_repo: amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mixtral-8x22B-Instruct-v0.1-FP8-KV
        precision: float8
      - model: Mistral 7B FP8
        mad_tag: pyt_vllm_mistral-7b_fp8
        model_repo: amd/Mistral-7B-v0.1-FP8-KV
        url: https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV
        precision: float8
    - group: Qwen
      tag: qwen
      models:
      - model: Qwen2 7B
        mad_tag: pyt_vllm_qwen2-7b
        model_repo: Qwen/Qwen2-7B-Instruct
        url: https://huggingface.co/Qwen/Qwen2-7B-Instruct
        precision: float16
      - model: Qwen2 72B
        mad_tag: pyt_vllm_qwen2-72b
        model_repo: Qwen/Qwen2-72B-Instruct
        url: https://huggingface.co/Qwen/Qwen2-72B-Instruct
        precision: float16
      - model: QwQ-32B
        mad_tag: pyt_vllm_qwq-32b
        model_repo: Qwen/QwQ-32B
        url: https://huggingface.co/Qwen/QwQ-32B
        precision: float16
        tunableop: true
    - group: Databricks DBRX
      tag: dbrx
      models:
      - model: DBRX Instruct
        mad_tag: pyt_vllm_dbrx-instruct
        model_repo: databricks/dbrx-instruct
        url: https://huggingface.co/databricks/dbrx-instruct
        precision: float16
      - model: DBRX Instruct FP8
        mad_tag: pyt_vllm_dbrx_fp8
        model_repo: amd/dbrx-instruct-FP8-KV
        url: https://huggingface.co/amd/dbrx-instruct-FP8-KV
        precision: float8
    - group: Google Gemma
      tag: gemma
      models:
      - model: Gemma 2 27B
        mad_tag: pyt_vllm_gemma-2-27b
        model_repo: google/gemma-2-27b
        url: https://huggingface.co/google/gemma-2-27b
        precision: float16
    - group: Cohere
      tag: cohere
      models:
      - model: C4AI Command R+ 08-2024
        mad_tag: pyt_vllm_c4ai-command-r-plus-08-2024
        model_repo: CohereForAI/c4ai-command-r-plus-08-2024
        url: https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
        precision: float16
      - model: C4AI Command R+ 08-2024 FP8
        mad_tag: pyt_vllm_command-r-plus_fp8
        model_repo: amd/c4ai-command-r-plus-FP8-KV
        url: https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV
        precision: float8
    - group: DeepSeek
      tag: deepseek
      models:
      - model: DeepSeek MoE 16B
        mad_tag: pyt_vllm_deepseek-moe-16b-chat
        model_repo: deepseek-ai/deepseek-moe-16b-chat
        url: https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat
        precision: float16
    - group: Microsoft Phi
      tag: phi
      models:
      - model: Phi-4
        mad_tag: pyt_vllm_phi-4
        model_repo: microsoft/phi-4
        url: https://huggingface.co/microsoft/phi-4
    - group: TII Falcon
      tag: falcon
      models:
      - model: Falcon 180B
        mad_tag: pyt_vllm_falcon-180b
        model_repo: tiiuae/falcon-180B
        url: https://huggingface.co/tiiuae/falcon-180B
        precision: float16