mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-11 23:15:06 -05:00
xDiT diffusion inference docker 26.1
This commit is contained in:
@@ -1,14 +1,13 @@
|
||||
docker:
|
||||
pull_tag: rocm/pytorch-xdit:v25.13
|
||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
|
||||
pull_tag: rocm/pytorch-xdit:v26.1
|
||||
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
||||
ROCm: 7.11.0
|
||||
whats_new:
|
||||
- "Flux.1 Kontext support"
|
||||
- "Flux.2 Dev support"
|
||||
- "Flux FP8 GEMM support"
|
||||
- "Hybrid FP8 attention support for Wan models"
|
||||
- "HunyuanVideo 1.5 support"
|
||||
- "Z-Image Turbo support"
|
||||
- "Wan model sharding"
|
||||
components:
|
||||
TheRock:
|
||||
TheRock:
|
||||
version: 1728a81
|
||||
url: https://github.com/ROCm/TheRock
|
||||
rccl:
|
||||
@@ -39,10 +38,10 @@ docker:
|
||||
version: de14bec0
|
||||
url: https://github.com/ROCm/aiter
|
||||
diffusers:
|
||||
version: a1f36ee3e
|
||||
version: 6708f5
|
||||
url: https://github.com/huggingface/diffusers
|
||||
xfuser:
|
||||
version: adf2681
|
||||
version: 0a3d7a
|
||||
url: https://github.com/xdit-project/xDiT
|
||||
yunchang:
|
||||
version: 2c9b712
|
||||
@@ -58,6 +57,49 @@ docker:
|
||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||
mad_tag: pyt_xdit_hunyuanvideo
|
||||
js_tag: hunyuan_tag
|
||||
benchmark_command:
|
||||
- cd /app/Hunyuanvideo
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
|
||||
- '--batch_size 1 \'
|
||||
- '--height 720 --width 1280 \'
|
||||
- '--seed 1168860793 \'
|
||||
- '--num_frames 129 \'
|
||||
- '--num_inference_steps 50 \'
|
||||
- '--warmup_steps 1 \'
|
||||
- '--n_repeats 1 \'
|
||||
- '--sleep_dur 10 \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--enable_tiling --enable_slicing \'
|
||||
- '--guidance_scale 6.0 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- model: Hunyuan Video 1.5
|
||||
model_repo: hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
|
||||
url: https://huggingface.co/hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
|
||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5
|
||||
mad_tag: pyt_xdit_hunyuanvideo_1_5
|
||||
js_tag: hunyuan_1_5_tag
|
||||
benchmark_command:
|
||||
- cd /app/Hunyuanvideo_1_5
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Hunyuanvideo_1_5/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
|
||||
- '--task t2v \'
|
||||
- '--height 720 --width 1280 \'
|
||||
- '--seed 1168860793 \'
|
||||
- '--num_frames 129 \'
|
||||
- '--num_inference_steps 50 \'
|
||||
- '--num_repetitions 1 \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--enable_tiling --enable_slicing \'
|
||||
- '--use_torch_compile \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- group: Wan-AI
|
||||
js_tag: wan
|
||||
models:
|
||||
@@ -67,12 +109,48 @@ docker:
|
||||
github: https://github.com/Wan-Video/Wan2.1
|
||||
mad_tag: pyt_xdit_wan_2_1
|
||||
js_tag: wan_21_tag
|
||||
benchmark_command:
|
||||
- cd /app/Wan
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
|
||||
- '--task i2v \'
|
||||
- '--height 720 \'
|
||||
- '--width 1280 \'
|
||||
- '--img_file_path /app/Wan/i2v_input.JPG \'
|
||||
- '--num_frames 81 \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--seed 42 \'
|
||||
- '--num_repetitions 1 \'
|
||||
- '--num_inference_steps 40 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- model: Wan2.2
|
||||
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||
github: https://github.com/Wan-Video/Wan2.2
|
||||
mad_tag: pyt_xdit_wan_2_2
|
||||
js_tag: wan_22_tag
|
||||
benchmark_command:
|
||||
- cd /app/Wan
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
|
||||
- '--task i2v \'
|
||||
- '--height 720 \'
|
||||
- '--width 1280 \'
|
||||
- '--img_file_path /app/Wan/i2v_input.JPG \'
|
||||
- '--num_frames 81 \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--seed 42 \'
|
||||
- '--num_repetitions 1 \'
|
||||
- '--num_inference_steps 40 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- group: FLUX
|
||||
js_tag: flux
|
||||
models:
|
||||
@@ -82,18 +160,79 @@ docker:
|
||||
github: https://github.com/black-forest-labs/flux
|
||||
mad_tag: pyt_xdit_flux
|
||||
js_tag: flux_1_tag
|
||||
benchmark_command:
|
||||
- cd /app/Flux
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Flux/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--seed 42 \'
|
||||
- '--prompt "A small cat" \'
|
||||
- '--height 1024 \'
|
||||
- '--width 1024 \'
|
||||
- '--num_inference_steps 25 \'
|
||||
- '--max_sequence_length 256 \'
|
||||
- '--warmup_steps 5 \'
|
||||
- '--no_use_resolution_binning \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--guidance_scale 0.0 \'
|
||||
- '--num_repetitions 50 \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- model: FLUX.1 Kontext
|
||||
model_repo: black-forest-labs/FLUX.1-Kontext-dev
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
|
||||
github: https://github.com/black-forest-labs/flux
|
||||
mad_tag: pyt_xdit_flux_kontext
|
||||
js_tag: flux_1_kontext_tag
|
||||
benchmark_command:
|
||||
- cd /app/Flux
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--seed 42 \'
|
||||
- '--prompt "Add a cool hat to the cat" \'
|
||||
- '--height 1024 \'
|
||||
- '--width 1024 \'
|
||||
- '--num_inference_steps 30 \'
|
||||
- '--max_sequence_length 512 \'
|
||||
- '--warmup_steps 5 \'
|
||||
- '--no_use_resolution_binning \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--img_file_path /app/Flux/cat.png \'
|
||||
- '--model_type flux_kontext \'
|
||||
- '--guidance_scale 2.5 \'
|
||||
- '--num_repetitions 25 \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- model: FLUX.2
|
||||
model_repo: black-forest-labs/FLUX.2-dev
|
||||
url: https://huggingface.co/black-forest-labs/FLUX.2-dev
|
||||
github: https://github.com/black-forest-labs/flux2
|
||||
mad_tag: pyt_xdit_flux_2
|
||||
js_tag: flux_2_tag
|
||||
benchmark_command:
|
||||
- cd /app/Flux
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--seed 42 \'
|
||||
- '--prompt "Add a cool hat to the cat" \'
|
||||
- '--height 1024 \'
|
||||
- '--width 1024 \'
|
||||
- '--num_inference_steps 50 \'
|
||||
- '--max_sequence_length 512 \'
|
||||
- '--warmup_steps 5 \'
|
||||
- '--no_use_resolution_binning \'
|
||||
- '--ulysses_degree 8 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--img_file_paths /app/Flux/cat.png \'
|
||||
- '--model_type flux2 \'
|
||||
- '--guidance_scale 4.0 \'
|
||||
- '--num_repetitions 25 \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- group: StableDiffusion
|
||||
js_tag: stablediffusion
|
||||
models:
|
||||
@@ -103,3 +242,42 @@ docker:
|
||||
github: https://github.com/Stability-AI/sd3.5
|
||||
mad_tag: pyt_xdit_sd_3_5
|
||||
js_tag: stable_diffusion_3_5_large_tag
|
||||
benchmark_command:
|
||||
- cd /app/StableDiffusion3.5
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--prompt "A capybara holding a sign that reads Hello World" \'
|
||||
- '--num_repetitions 50 \'
|
||||
- '--num_inference_steps 28 \'
|
||||
- '--pipefusion_parallel_degree 4 \'
|
||||
- '--use_cfg_parallel \'
|
||||
- '--use_torch_compile \'
|
||||
- '--dtype torch.float16 \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
- group: Z-Image
|
||||
js_tag: z_image
|
||||
models:
|
||||
- model: Z-Image Turbo
|
||||
model_repo: Tongyi-MAI/Z-Image-Turbo
|
||||
url: https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
|
||||
github: https://github.com/Tongyi-MAI/Z-Image
|
||||
mad_tag: pyt_xdit_z_image_turbo
|
||||
js_tag: z_image_turbo_tag
|
||||
benchmark_command:
|
||||
- cd /app/Z-Image
|
||||
- mkdir results
|
||||
- 'torchrun --nproc_per_node=2 /app/Z-Image/run.py \'
|
||||
- '--model {model_repo} \'
|
||||
- '--seed 42 \'
|
||||
- '--prompt "A crowded beach" \'
|
||||
- '--height 1088 \'
|
||||
- '--width 1920 \'
|
||||
- '--num_inference_steps 9 \'
|
||||
- '--ulysses_degree 2 \'
|
||||
- '--use_torch_compile \'
|
||||
- '--guidance_scale 0.0 \'
|
||||
- '--num_repetitions 50 \'
|
||||
- '--attention_backend aiter \'
|
||||
- '--benchmark_output_directory results'
|
||||
|
||||
@@ -13,15 +13,10 @@ xDiT diffusion inference
|
||||
|
||||
{% set docker = data.docker %}
|
||||
|
||||
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
|
||||
a prebuilt, optimized environment based on `xDiT
|
||||
<https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
|
||||
video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
|
||||
and MI300X (gfx942) GPUs.
|
||||
|
||||
The image runs a preview version of ROCm using the new `TheRock
|
||||
<https://github.com/ROCm/TheRock>`__ build system and includes the following
|
||||
components:
|
||||
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
|
||||
benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
|
||||
The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
|
||||
and includes the following components:
|
||||
|
||||
.. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
|
||||
|
||||
@@ -105,22 +100,6 @@ vary by model -- select one to get started.
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Performance measurements
|
||||
========================
|
||||
|
||||
To evaluate performance, the `Performance results with AMD ROCm software
|
||||
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
||||
page provides reference throughput and serving measurements for inferencing popular AI models.
|
||||
|
||||
.. important::
|
||||
|
||||
The performance data presented in `Performance results with AMD ROCm
|
||||
software
|
||||
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
||||
only reflects the latest version of this inference benchmarking environment.
|
||||
The listed measurements should not be interpreted as the peak performance
|
||||
achievable by AMD Instinct GPUs or ROCm software.
|
||||
|
||||
System validation
|
||||
=================
|
||||
|
||||
@@ -300,7 +279,7 @@ Run inference
|
||||
--tags {{model.mad_tag}} \
|
||||
--keep-model-dir \
|
||||
--live-output
|
||||
|
||||
|
||||
MAD launches a Docker container with the name
|
||||
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
||||
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
||||
@@ -311,152 +290,15 @@ Run inference
|
||||
To run the benchmarks for {{ model.model }}, use the following command:
|
||||
|
||||
.. code-block:: shell
|
||||
{% if model.model == "Hunyuan Video" %}
|
||||
cd /app/Hunyuanvideo
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 run.py \
|
||||
--model {{ model.model_repo }} \
|
||||
--prompt "In the large cage, two puppies were wagging their tails at each other." \
|
||||
--height 720 --width 1280 --num_frames 129 \
|
||||
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
|
||||
--ulysses_degree 8 \
|
||||
--enable_tiling --enable_slicing \
|
||||
--use_torch_compile \
|
||||
--bench_output results
|
||||
{{ model.benchmark_command
|
||||
| map('replace', '{model_repo}', model.model_repo)
|
||||
| map('trim')
|
||||
| join('\n ') }}
|
||||
|
||||
{% endif %}
|
||||
{% if model.model == "Wan2.1" %}
|
||||
cd /app/Wan
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||
--task i2v \
|
||||
--height 720 \
|
||||
--width 1280 \
|
||||
--model {{ model.model_repo }} \
|
||||
--img_file_path /app/Wan/i2v_input.JPG \
|
||||
--ulysses_degree 8 \
|
||||
--seed 42 \
|
||||
--num_frames 81 \
|
||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||
--num_repetitions 1 \
|
||||
--num_inference_steps 40 \
|
||||
--use_torch_compile
|
||||
|
||||
{% endif %}
|
||||
{% if model.model == "Wan2.2" %}
|
||||
cd /app/Wan
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||
--task i2v \
|
||||
--height 720 \
|
||||
--width 1280 \
|
||||
--model {{ model.model_repo }} \
|
||||
--img_file_path /app/Wan/i2v_input.JPG \
|
||||
--ulysses_degree 8 \
|
||||
--seed 42 \
|
||||
--num_frames 81 \
|
||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||
--num_repetitions 1 \
|
||||
--num_inference_steps 40 \
|
||||
--use_torch_compile
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if model.model == "FLUX.1" %}
|
||||
cd /app/Flux
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/Flux/run.py \
|
||||
--model {{ model.model_repo }} \
|
||||
--seed 42 \
|
||||
--prompt "A small cat" \
|
||||
--height 1024 \
|
||||
--width 1024 \
|
||||
--num_inference_steps 25 \
|
||||
--max_sequence_length 256 \
|
||||
--warmup_steps 5 \
|
||||
--no_use_resolution_binning \
|
||||
--ulysses_degree 8 \
|
||||
--use_torch_compile \
|
||||
--num_repetitions 50
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if model.model == "FLUX.1 Kontext" %}
|
||||
cd /app/Flux
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
||||
--model {{ model.model_repo }} \
|
||||
--seed 42 \
|
||||
--prompt "Add a cool hat to the cat" \
|
||||
--height 1024 \
|
||||
--width 1024 \
|
||||
--num_inference_steps 30 \
|
||||
--max_sequence_length 512 \
|
||||
--warmup_steps 5 \
|
||||
--no_use_resolution_binning \
|
||||
--ulysses_degree 8 \
|
||||
--use_torch_compile \
|
||||
--img_file_path /app/Flux/cat.png \
|
||||
--model_type flux_kontext \
|
||||
--guidance_scale 2.5 \
|
||||
--num_repetitions 25
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if model.model == "FLUX.2" %}
|
||||
cd /app/Flux
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
||||
--model {{ model.model_repo }} \
|
||||
--seed 42 \
|
||||
--prompt "Add a cool hat to the cat" \
|
||||
--height 1024 \
|
||||
--width 1024 \
|
||||
--num_inference_steps 50 \
|
||||
--max_sequence_length 512 \
|
||||
--warmup_steps 5 \
|
||||
--no_use_resolution_binning \
|
||||
--ulysses_degree 8 \
|
||||
--use_torch_compile \
|
||||
--img_file_paths /app/Flux/cat.png \
|
||||
--model_type flux2 \
|
||||
--guidance_scale 4.0 \
|
||||
--num_repetitions 25
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if model.model == "stable-diffusion-3.5-large" %}
|
||||
cd /app/StableDiffusion3.5
|
||||
mkdir results
|
||||
|
||||
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
||||
--model {{ model.model_repo }} \
|
||||
--num_inference_steps 28 \
|
||||
--prompt "A capybara holding a sign that reads Hello World" \
|
||||
--use_torch_compile \
|
||||
--pipefusion_parallel_degree 4 \
|
||||
--use_cfg_parallel \
|
||||
--num_repetitions 50 \
|
||||
--dtype torch.float16 \
|
||||
--output_path results
|
||||
|
||||
{% endif %}
|
||||
|
||||
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
|
||||
The generated video will be stored under the results directory.
|
||||
|
||||
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
||||
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
Previous versions
|
||||
=================
|
||||
|
||||
See :doc:`benchmark-docker/previous-versions/xdit-history` to find documentation for previous releases
|
||||
of xDiT diffusion inference performance testing.
|
||||
|
||||
Reference in New Issue
Block a user