mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-12 23:45:05 -05:00
Compare commits
3 Commits
amd/hsivas
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a669b4b69 | ||
|
|
fe8dff691d | ||
|
|
19891f8ef1 |
@@ -0,0 +1,105 @@
|
|||||||
|
docker:
|
||||||
|
pull_tag: rocm/pytorch-xdit:v25.13
|
||||||
|
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
|
||||||
|
ROCm: 7.11.0
|
||||||
|
whats_new:
|
||||||
|
- "Flux.1 Kontext support"
|
||||||
|
- "Flux.2 Dev support"
|
||||||
|
- "Flux FP8 GEMM support"
|
||||||
|
- "Hybrid FP8 attention support for Wan models"
|
||||||
|
components:
|
||||||
|
TheRock:
|
||||||
|
version: 1728a81
|
||||||
|
url: https://github.com/ROCm/TheRock
|
||||||
|
rccl:
|
||||||
|
version: d23d18f
|
||||||
|
url: https://github.com/ROCm/rccl
|
||||||
|
composable_kernel:
|
||||||
|
version: ab0101c
|
||||||
|
url: https://github.com/ROCm/composable_kernel
|
||||||
|
rocm-libraries:
|
||||||
|
version: a2f7c35
|
||||||
|
url: https://github.com/ROCm/rocm-libraries
|
||||||
|
rocm-systems:
|
||||||
|
version: 659737c
|
||||||
|
url: https://github.com/ROCm/rocm-systems
|
||||||
|
torch:
|
||||||
|
version: 91be249
|
||||||
|
url: https://github.com/ROCm/pytorch
|
||||||
|
torchvision:
|
||||||
|
version: b919bd0
|
||||||
|
url: https://github.com/pytorch/vision
|
||||||
|
triton:
|
||||||
|
version: a272dfa
|
||||||
|
url: https://github.com/ROCm/triton
|
||||||
|
accelerate:
|
||||||
|
version: b521400f
|
||||||
|
url: https://github.com/huggingface/accelerate
|
||||||
|
aiter:
|
||||||
|
version: de14bec0
|
||||||
|
url: https://github.com/ROCm/aiter
|
||||||
|
diffusers:
|
||||||
|
version: a1f36ee3e
|
||||||
|
url: https://github.com/huggingface/diffusers
|
||||||
|
xfuser:
|
||||||
|
version: adf2681
|
||||||
|
url: https://github.com/xdit-project/xDiT
|
||||||
|
yunchang:
|
||||||
|
version: 2c9b712
|
||||||
|
url: https://github.com/feifeibear/long-context-attention
|
||||||
|
supported_models:
|
||||||
|
- group: Hunyuan Video
|
||||||
|
js_tag: hunyuan
|
||||||
|
models:
|
||||||
|
- model: Hunyuan Video
|
||||||
|
model_repo: tencent/HunyuanVideo
|
||||||
|
revision: refs/pr/18
|
||||||
|
url: https://huggingface.co/tencent/HunyuanVideo
|
||||||
|
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||||
|
mad_tag: pyt_xdit_hunyuanvideo
|
||||||
|
js_tag: hunyuan_tag
|
||||||
|
- group: Wan-AI
|
||||||
|
js_tag: wan
|
||||||
|
models:
|
||||||
|
- model: Wan2.1
|
||||||
|
model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||||
|
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||||
|
github: https://github.com/Wan-Video/Wan2.1
|
||||||
|
mad_tag: pyt_xdit_wan_2_1
|
||||||
|
js_tag: wan_21_tag
|
||||||
|
- model: Wan2.2
|
||||||
|
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
|
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
|
github: https://github.com/Wan-Video/Wan2.2
|
||||||
|
mad_tag: pyt_xdit_wan_2_2
|
||||||
|
js_tag: wan_22_tag
|
||||||
|
- group: FLUX
|
||||||
|
js_tag: flux
|
||||||
|
models:
|
||||||
|
- model: FLUX.1
|
||||||
|
model_repo: black-forest-labs/FLUX.1-dev
|
||||||
|
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||||
|
github: https://github.com/black-forest-labs/flux
|
||||||
|
mad_tag: pyt_xdit_flux
|
||||||
|
js_tag: flux_1_tag
|
||||||
|
- model: FLUX.1 Kontext
|
||||||
|
model_repo: black-forest-labs/FLUX.1-Kontext-dev
|
||||||
|
url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
|
||||||
|
github: https://github.com/black-forest-labs/flux
|
||||||
|
mad_tag: pyt_xdit_flux_kontext
|
||||||
|
js_tag: flux_1_kontext_tag
|
||||||
|
- model: FLUX.2
|
||||||
|
model_repo: black-forest-labs/FLUX.2-dev
|
||||||
|
url: https://huggingface.co/black-forest-labs/FLUX.2-dev
|
||||||
|
github: https://github.com/black-forest-labs/flux2
|
||||||
|
mad_tag: pyt_xdit_flux_2
|
||||||
|
js_tag: flux_2_tag
|
||||||
|
- group: StableDiffusion
|
||||||
|
js_tag: stablediffusion
|
||||||
|
models:
|
||||||
|
- model: stable-diffusion-3.5-large
|
||||||
|
model_repo: stabilityai/stable-diffusion-3.5-large
|
||||||
|
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
|
||||||
|
github: https://github.com/Stability-AI/sd3.5
|
||||||
|
mad_tag: pyt_xdit_sd_3_5
|
||||||
|
js_tag: stable_diffusion_3_5_large_tag
|
||||||
@@ -1,14 +1,13 @@
|
|||||||
docker:
|
docker:
|
||||||
pull_tag: rocm/pytorch-xdit:v25.13
|
pull_tag: rocm/pytorch-xdit:v26.1
|
||||||
docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
|
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
||||||
ROCm: 7.11.0
|
ROCm: 7.11.0
|
||||||
whats_new:
|
whats_new:
|
||||||
- "Flux.1 Kontext support"
|
- "HunyuanVideo 1.5 support"
|
||||||
- "Flux.2 Dev support"
|
- "Z-Image Turbo support"
|
||||||
- "Flux FP8 GEMM support"
|
- "Wan model sharding"
|
||||||
- "Hybrid FP8 attention support for Wan models"
|
|
||||||
components:
|
components:
|
||||||
TheRock:
|
TheRock:
|
||||||
version: 1728a81
|
version: 1728a81
|
||||||
url: https://github.com/ROCm/TheRock
|
url: https://github.com/ROCm/TheRock
|
||||||
rccl:
|
rccl:
|
||||||
@@ -39,10 +38,10 @@ docker:
|
|||||||
version: de14bec0
|
version: de14bec0
|
||||||
url: https://github.com/ROCm/aiter
|
url: https://github.com/ROCm/aiter
|
||||||
diffusers:
|
diffusers:
|
||||||
version: a1f36ee3e
|
version: 6708f5
|
||||||
url: https://github.com/huggingface/diffusers
|
url: https://github.com/huggingface/diffusers
|
||||||
xfuser:
|
xfuser:
|
||||||
version: adf2681
|
version: 0a3d7a
|
||||||
url: https://github.com/xdit-project/xDiT
|
url: https://github.com/xdit-project/xDiT
|
||||||
yunchang:
|
yunchang:
|
||||||
version: 2c9b712
|
version: 2c9b712
|
||||||
@@ -58,6 +57,49 @@ docker:
|
|||||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||||
mad_tag: pyt_xdit_hunyuanvideo
|
mad_tag: pyt_xdit_hunyuanvideo
|
||||||
js_tag: hunyuan_tag
|
js_tag: hunyuan_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Hunyuanvideo
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
|
||||||
|
- '--batch_size 1 \'
|
||||||
|
- '--height 720 --width 1280 \'
|
||||||
|
- '--seed 1168860793 \'
|
||||||
|
- '--num_frames 129 \'
|
||||||
|
- '--num_inference_steps 50 \'
|
||||||
|
- '--warmup_steps 1 \'
|
||||||
|
- '--n_repeats 1 \'
|
||||||
|
- '--sleep_dur 10 \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--enable_tiling --enable_slicing \'
|
||||||
|
- '--guidance_scale 6.0 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
|
- model: Hunyuan Video 1.5
|
||||||
|
model_repo: hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
|
||||||
|
url: https://huggingface.co/hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
|
||||||
|
github: https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5
|
||||||
|
mad_tag: pyt_xdit_hunyuanvideo_1_5
|
||||||
|
js_tag: hunyuan_1_5_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Hunyuanvideo_1_5
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Hunyuanvideo_1_5/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
|
||||||
|
- '--task t2v \'
|
||||||
|
- '--height 720 --width 1280 \'
|
||||||
|
- '--seed 1168860793 \'
|
||||||
|
- '--num_frames 129 \'
|
||||||
|
- '--num_inference_steps 50 \'
|
||||||
|
- '--num_repetitions 1 \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--enable_tiling --enable_slicing \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- group: Wan-AI
|
- group: Wan-AI
|
||||||
js_tag: wan
|
js_tag: wan
|
||||||
models:
|
models:
|
||||||
@@ -67,12 +109,48 @@ docker:
|
|||||||
github: https://github.com/Wan-Video/Wan2.1
|
github: https://github.com/Wan-Video/Wan2.1
|
||||||
mad_tag: pyt_xdit_wan_2_1
|
mad_tag: pyt_xdit_wan_2_1
|
||||||
js_tag: wan_21_tag
|
js_tag: wan_21_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Wan
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
|
||||||
|
- '--task i2v \'
|
||||||
|
- '--height 720 \'
|
||||||
|
- '--width 1280 \'
|
||||||
|
- '--img_file_path /app/Wan/i2v_input.JPG \'
|
||||||
|
- '--num_frames 81 \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--num_repetitions 1 \'
|
||||||
|
- '--num_inference_steps 40 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- model: Wan2.2
|
- model: Wan2.2
|
||||||
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
github: https://github.com/Wan-Video/Wan2.2
|
github: https://github.com/Wan-Video/Wan2.2
|
||||||
mad_tag: pyt_xdit_wan_2_2
|
mad_tag: pyt_xdit_wan_2_2
|
||||||
js_tag: wan_22_tag
|
js_tag: wan_22_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Wan
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
|
||||||
|
- '--task i2v \'
|
||||||
|
- '--height 720 \'
|
||||||
|
- '--width 1280 \'
|
||||||
|
- '--img_file_path /app/Wan/i2v_input.JPG \'
|
||||||
|
- '--num_frames 81 \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--num_repetitions 1 \'
|
||||||
|
- '--num_inference_steps 40 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- group: FLUX
|
- group: FLUX
|
||||||
js_tag: flux
|
js_tag: flux
|
||||||
models:
|
models:
|
||||||
@@ -82,18 +160,79 @@ docker:
|
|||||||
github: https://github.com/black-forest-labs/flux
|
github: https://github.com/black-forest-labs/flux
|
||||||
mad_tag: pyt_xdit_flux
|
mad_tag: pyt_xdit_flux
|
||||||
js_tag: flux_1_tag
|
js_tag: flux_1_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Flux
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Flux/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--prompt "A small cat" \'
|
||||||
|
- '--height 1024 \'
|
||||||
|
- '--width 1024 \'
|
||||||
|
- '--num_inference_steps 25 \'
|
||||||
|
- '--max_sequence_length 256 \'
|
||||||
|
- '--warmup_steps 5 \'
|
||||||
|
- '--no_use_resolution_binning \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--guidance_scale 0.0 \'
|
||||||
|
- '--num_repetitions 50 \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- model: FLUX.1 Kontext
|
- model: FLUX.1 Kontext
|
||||||
model_repo: black-forest-labs/FLUX.1-Kontext-dev
|
model_repo: black-forest-labs/FLUX.1-Kontext-dev
|
||||||
url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
|
url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
|
||||||
github: https://github.com/black-forest-labs/flux
|
github: https://github.com/black-forest-labs/flux
|
||||||
mad_tag: pyt_xdit_flux_kontext
|
mad_tag: pyt_xdit_flux_kontext
|
||||||
js_tag: flux_1_kontext_tag
|
js_tag: flux_1_kontext_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Flux
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--prompt "Add a cool hat to the cat" \'
|
||||||
|
- '--height 1024 \'
|
||||||
|
- '--width 1024 \'
|
||||||
|
- '--num_inference_steps 30 \'
|
||||||
|
- '--max_sequence_length 512 \'
|
||||||
|
- '--warmup_steps 5 \'
|
||||||
|
- '--no_use_resolution_binning \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--img_file_path /app/Flux/cat.png \'
|
||||||
|
- '--model_type flux_kontext \'
|
||||||
|
- '--guidance_scale 2.5 \'
|
||||||
|
- '--num_repetitions 25 \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- model: FLUX.2
|
- model: FLUX.2
|
||||||
model_repo: black-forest-labs/FLUX.2-dev
|
model_repo: black-forest-labs/FLUX.2-dev
|
||||||
url: https://huggingface.co/black-forest-labs/FLUX.2-dev
|
url: https://huggingface.co/black-forest-labs/FLUX.2-dev
|
||||||
github: https://github.com/black-forest-labs/flux2
|
github: https://github.com/black-forest-labs/flux2
|
||||||
mad_tag: pyt_xdit_flux_2
|
mad_tag: pyt_xdit_flux_2
|
||||||
js_tag: flux_2_tag
|
js_tag: flux_2_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Flux
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--prompt "Add a cool hat to the cat" \'
|
||||||
|
- '--height 1024 \'
|
||||||
|
- '--width 1024 \'
|
||||||
|
- '--num_inference_steps 50 \'
|
||||||
|
- '--max_sequence_length 512 \'
|
||||||
|
- '--warmup_steps 5 \'
|
||||||
|
- '--no_use_resolution_binning \'
|
||||||
|
- '--ulysses_degree 8 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--img_file_paths /app/Flux/cat.png \'
|
||||||
|
- '--model_type flux2 \'
|
||||||
|
- '--guidance_scale 4.0 \'
|
||||||
|
- '--num_repetitions 25 \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
- group: StableDiffusion
|
- group: StableDiffusion
|
||||||
js_tag: stablediffusion
|
js_tag: stablediffusion
|
||||||
models:
|
models:
|
||||||
@@ -103,3 +242,42 @@ docker:
|
|||||||
github: https://github.com/Stability-AI/sd3.5
|
github: https://github.com/Stability-AI/sd3.5
|
||||||
mad_tag: pyt_xdit_sd_3_5
|
mad_tag: pyt_xdit_sd_3_5
|
||||||
js_tag: stable_diffusion_3_5_large_tag
|
js_tag: stable_diffusion_3_5_large_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/StableDiffusion3.5
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--prompt "A capybara holding a sign that reads Hello World" \'
|
||||||
|
- '--num_repetitions 50 \'
|
||||||
|
- '--num_inference_steps 28 \'
|
||||||
|
- '--pipefusion_parallel_degree 4 \'
|
||||||
|
- '--use_cfg_parallel \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--dtype torch.float16 \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
|
- group: Z-Image
|
||||||
|
js_tag: z_image
|
||||||
|
models:
|
||||||
|
- model: Z-Image Turbo
|
||||||
|
model_repo: Tongyi-MAI/Z-Image-Turbo
|
||||||
|
url: https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
|
||||||
|
github: https://github.com/Tongyi-MAI/Z-Image
|
||||||
|
mad_tag: pyt_xdit_z_image_turbo
|
||||||
|
js_tag: z_image_turbo_tag
|
||||||
|
benchmark_command:
|
||||||
|
- cd /app/Z-Image
|
||||||
|
- mkdir results
|
||||||
|
- 'torchrun --nproc_per_node=2 /app/Z-Image/run.py \'
|
||||||
|
- '--model {model_repo} \'
|
||||||
|
- '--seed 42 \'
|
||||||
|
- '--prompt "A crowded beach" \'
|
||||||
|
- '--height 1088 \'
|
||||||
|
- '--width 1920 \'
|
||||||
|
- '--num_inference_steps 9 \'
|
||||||
|
- '--ulysses_degree 2 \'
|
||||||
|
- '--use_torch_compile \'
|
||||||
|
- '--guidance_scale 0.0 \'
|
||||||
|
- '--num_repetitions 50 \'
|
||||||
|
- '--attention_backend aiter \'
|
||||||
|
- '--benchmark_output_directory results'
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ xDiT diffusion inference
|
|||||||
|
|
||||||
.. caution::
|
.. caution::
|
||||||
|
|
||||||
This documentation does not reflect the latest version of ROCm vLLM
|
This documentation does not reflect the latest version of xDiT diffusion
|
||||||
inference performance documentation. See
|
inference performance documentation. See
|
||||||
:doc:`/how-to/rocm-for-ai/inference/xdit-diffusion-inference` for the latest
|
:doc:`/how-to/rocm-for-ai/inference/xdit-diffusion-inference` for the latest
|
||||||
version.
|
version.
|
||||||
@@ -293,7 +293,7 @@ Run inference
|
|||||||
--tags {{model.mad_tag}} \
|
--tags {{model.mad_tag}} \
|
||||||
--keep-model-dir \
|
--keep-model-dir \
|
||||||
--live-output
|
--live-output
|
||||||
|
|
||||||
MAD launches a Docker container with the name
|
MAD launches a Docker container with the name
|
||||||
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
||||||
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
||||||
@@ -379,7 +379,7 @@ Run inference
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if model.model == "stable-diffusion-3.5-large" %}
|
{% if model.model == "stable-diffusion-3.5-large" %}
|
||||||
cd StableDiffusion3.5
|
cd StableDiffusion3.5
|
||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
||||||
|
|||||||
@@ -0,0 +1,474 @@
|
|||||||
|
:orphan:
|
||||||
|
|
||||||
|
.. meta::
|
||||||
|
:description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
|
||||||
|
prebuilt and optimized docker images.
|
||||||
|
:keywords: xDiT, diffusion, video, video generation, image, image generation, validate, benchmark
|
||||||
|
|
||||||
|
************************
|
||||||
|
xDiT diffusion inference
|
||||||
|
************************
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
This documentation does not reflect the latest version of the xDiT diffusion
|
||||||
|
inference performance documentation. See
|
||||||
|
:doc:`/how-to/rocm-for-ai/inference/xdit-diffusion-inference` for the latest
|
||||||
|
version.
|
||||||
|
|
||||||
|
.. _xdit-video-diffusion-2513:
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
|
||||||
|
a prebuilt, optimized environment based on `xDiT
|
||||||
|
<https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
|
||||||
|
video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
|
||||||
|
and MI300X (gfx942) GPUs.
|
||||||
|
|
||||||
|
The image runs a preview version of ROCm using the new `TheRock
|
||||||
|
<https://github.com/ROCm/TheRock>`__ build system and includes the following
|
||||||
|
components:
|
||||||
|
|
||||||
|
.. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
:header-rows: 1
|
||||||
|
|
||||||
|
* - Software component
|
||||||
|
- Version
|
||||||
|
|
||||||
|
{% for component_name, component_data in docker.components.items() %}
|
||||||
|
* - `{{ component_name }} <{{ component_data.url }}>`_
|
||||||
|
- {{ component_data.version }}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
|
||||||
|
For preview and development releases, see `amdsiloai/pytorch-xdit <https://hub.docker.com/r/amdsiloai/pytorch-xdit>`_.
|
||||||
|
|
||||||
|
What's new
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
{% for item in docker.whats_new %}
|
||||||
|
* {{ item }}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
.. _xdit-video-diffusion-supported-models-2513:
|
||||||
|
|
||||||
|
Supported models
|
||||||
|
================
|
||||||
|
|
||||||
|
The following models are supported for inference performance benchmarking.
|
||||||
|
Some instructions, commands, and recommendations in this documentation might
|
||||||
|
vary by model -- select one to get started.
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
.. raw:: html
|
||||||
|
|
||||||
|
<div id="vllm-benchmark-ud-params-picker" class="container-fluid">
|
||||||
|
<div class="row gx-0">
|
||||||
|
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||||
|
<div class="row col-10 pe-0">
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
<div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.js_tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row gx-0 pt-1">
|
||||||
|
<div class="col-2 me-1 px-2 model-param-head">Variant</div>
|
||||||
|
<div class="row col-10 pe-0">
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
{% set models = model_group.models %}
|
||||||
|
{% for model in models %}
|
||||||
|
{% if models|length % 3 == 0 %}
|
||||||
|
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
|
.. container:: model-doc {{ model.js_tag }}
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
To learn more about your specific model see the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_
|
||||||
|
or visit the `GitHub page <{{ model.github }}>`__. Note that some models require access authorization before use via an
|
||||||
|
external license agreement through a third party.
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
Performance measurements
|
||||||
|
========================
|
||||||
|
|
||||||
|
To evaluate performance, the `Performance results with AMD ROCm software
|
||||||
|
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
||||||
|
page provides reference throughput and serving measurements for inferencing popular AI models.
|
||||||
|
|
||||||
|
.. important::
|
||||||
|
|
||||||
|
The performance data presented in `Performance results with AMD ROCm
|
||||||
|
software
|
||||||
|
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
||||||
|
only reflects the latest version of this inference benchmarking environment.
|
||||||
|
The listed measurements should not be interpreted as the peak performance
|
||||||
|
achievable by AMD Instinct GPUs or ROCm software.
|
||||||
|
|
||||||
|
System validation
|
||||||
|
=================
|
||||||
|
|
||||||
|
Before running AI workloads, it's important to validate that your AMD hardware is configured
|
||||||
|
correctly and performing optimally.
|
||||||
|
|
||||||
|
If you have already validated your system settings, including aspects like NUMA auto-balancing, you
|
||||||
|
can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
|
||||||
|
optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
|
||||||
|
before starting.
|
||||||
|
|
||||||
|
To test for optimal performance, consult the recommended :ref:`System health benchmarks
|
||||||
|
<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
|
||||||
|
system's configuration.
|
||||||
|
|
||||||
|
Pull the Docker image
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
|
||||||
|
Pull the image using the following command:
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
docker pull {{ docker.pull_tag }}
|
||||||
|
|
||||||
|
Validate and benchmark
|
||||||
|
======================
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
Once the image has been downloaded you can follow these steps to
|
||||||
|
run benchmarks and generate outputs.
|
||||||
|
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
|
.. container:: model-doc {{model.js_tag}}
|
||||||
|
|
||||||
|
The following commands are written for {{ model.model }}.
|
||||||
|
See :ref:`xdit-video-diffusion-supported-models-2513` to switch to another available model.
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
Choose your setup method
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
You can either use an existing Hugging Face cache or download the model fresh inside the container.
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
{% for model in model_group.models %}
|
||||||
|
.. container:: model-doc {{model.js_tag}}
|
||||||
|
|
||||||
|
.. tab-set::
|
||||||
|
|
||||||
|
.. tab-item:: Option 1: Use existing Hugging Face cache
|
||||||
|
|
||||||
|
If you already have models downloaded on your host system, you can mount your existing cache.
|
||||||
|
|
||||||
|
1. Set your Hugging Face cache location.
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
export HF_HOME=/your/hf_cache/location
|
||||||
|
|
||||||
|
2. Download the model (if not already cached).
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %}
|
||||||
|
|
||||||
|
3. Launch the container with mounted cache.
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
-it --rm \
|
||||||
|
--cap-add=SYS_PTRACE \
|
||||||
|
--security-opt seccomp=unconfined \
|
||||||
|
--user root \
|
||||||
|
--device=/dev/kfd \
|
||||||
|
--device=/dev/dri \
|
||||||
|
--group-add video \
|
||||||
|
--ipc=host \
|
||||||
|
--network host \
|
||||||
|
--privileged \
|
||||||
|
--shm-size 128G \
|
||||||
|
--name pytorch-xdit \
|
||||||
|
-e HSA_NO_SCRATCH_RECLAIM=1 \
|
||||||
|
-e OMP_NUM_THREADS=16 \
|
||||||
|
-e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
||||||
|
-e HF_HOME=/app/huggingface_models \
|
||||||
|
-v $HF_HOME:/app/huggingface_models \
|
||||||
|
{{ docker.pull_tag }}
|
||||||
|
|
||||||
|
.. tab-item:: Option 2: Download inside container
|
||||||
|
|
||||||
|
If you prefer to keep the container self-contained or don't have an existing cache.
|
||||||
|
|
||||||
|
1. Launch the container
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
-it --rm \
|
||||||
|
--cap-add=SYS_PTRACE \
|
||||||
|
--security-opt seccomp=unconfined \
|
||||||
|
--user root \
|
||||||
|
--device=/dev/kfd \
|
||||||
|
--device=/dev/dri \
|
||||||
|
--group-add video \
|
||||||
|
--ipc=host \
|
||||||
|
--network host \
|
||||||
|
--privileged \
|
||||||
|
--shm-size 128G \
|
||||||
|
--name pytorch-xdit \
|
||||||
|
-e HSA_NO_SCRATCH_RECLAIM=1 \
|
||||||
|
-e OMP_NUM_THREADS=16 \
|
||||||
|
-e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
||||||
|
{{ docker.pull_tag }}
|
||||||
|
|
||||||
|
2. Inside the container, set the Hugging Face cache location and download the model.
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
export HF_HOME=/app/huggingface_models
|
||||||
|
huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %}
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
Models will be downloaded to the container's filesystem and will be lost when the container is removed unless you persist the data with a volume.
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
Run inference
|
||||||
|
=============
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
|
.. container:: model-doc {{ model.js_tag }}
|
||||||
|
|
||||||
|
.. tab-set::
|
||||||
|
|
||||||
|
.. tab-item:: MAD-integrated benchmarking
|
||||||
|
|
||||||
|
1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
|
||||||
|
directory and install the required packages on the host machine.
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
git clone https://github.com/ROCm/MAD
|
||||||
|
cd MAD
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
2. On the host machine, use this command to run the performance benchmark test on
|
||||||
|
the `{{model.model}} <{{ model.url }}>`_ model using one node.
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
|
||||||
|
madengine run \
|
||||||
|
--tags {{model.mad_tag}} \
|
||||||
|
--keep-model-dir \
|
||||||
|
--live-output
|
||||||
|
|
||||||
|
MAD launches a Docker container with the name
|
||||||
|
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
||||||
|
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
||||||
|
and ``{{ model.mad_tag }}_serving.csv``.
|
||||||
|
|
||||||
|
.. tab-item:: Standalone benchmarking
|
||||||
|
|
||||||
|
To run the benchmarks for {{ model.model }}, use the following command:
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
{% if model.model == "Hunyuan Video" %}
|
||||||
|
cd /app/Hunyuanvideo
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 run.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--prompt "In the large cage, two puppies were wagging their tails at each other." \
|
||||||
|
--height 720 --width 1280 --num_frames 129 \
|
||||||
|
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--enable_tiling --enable_slicing \
|
||||||
|
--use_torch_compile \
|
||||||
|
--bench_output results
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
{% if model.model == "Wan2.1" %}
|
||||||
|
cd /app/Wan
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||||
|
--task i2v \
|
||||||
|
--height 720 \
|
||||||
|
--width 1280 \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--img_file_path /app/Wan/i2v_input.JPG \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--seed 42 \
|
||||||
|
--num_frames 81 \
|
||||||
|
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||||
|
--num_repetitions 1 \
|
||||||
|
--num_inference_steps 40 \
|
||||||
|
--use_torch_compile
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
{% if model.model == "Wan2.2" %}
|
||||||
|
cd /app/Wan
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||||
|
--task i2v \
|
||||||
|
--height 720 \
|
||||||
|
--width 1280 \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--img_file_path /app/Wan/i2v_input.JPG \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--seed 42 \
|
||||||
|
--num_frames 81 \
|
||||||
|
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||||
|
--num_repetitions 1 \
|
||||||
|
--num_inference_steps 40 \
|
||||||
|
--use_torch_compile
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if model.model == "FLUX.1" %}
|
||||||
|
cd /app/Flux
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/Flux/run.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--seed 42 \
|
||||||
|
--prompt "A small cat" \
|
||||||
|
--height 1024 \
|
||||||
|
--width 1024 \
|
||||||
|
--num_inference_steps 25 \
|
||||||
|
--max_sequence_length 256 \
|
||||||
|
--warmup_steps 5 \
|
||||||
|
--no_use_resolution_binning \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--use_torch_compile \
|
||||||
|
--num_repetitions 50
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if model.model == "FLUX.1 Kontext" %}
|
||||||
|
cd /app/Flux
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--seed 42 \
|
||||||
|
--prompt "Add a cool hat to the cat" \
|
||||||
|
--height 1024 \
|
||||||
|
--width 1024 \
|
||||||
|
--num_inference_steps 30 \
|
||||||
|
--max_sequence_length 512 \
|
||||||
|
--warmup_steps 5 \
|
||||||
|
--no_use_resolution_binning \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--use_torch_compile \
|
||||||
|
--img_file_path /app/Flux/cat.png \
|
||||||
|
--model_type flux_kontext \
|
||||||
|
--guidance_scale 2.5 \
|
||||||
|
--num_repetitions 25
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if model.model == "FLUX.2" %}
|
||||||
|
cd /app/Flux
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--seed 42 \
|
||||||
|
--prompt "Add a cool hat to the cat" \
|
||||||
|
--height 1024 \
|
||||||
|
--width 1024 \
|
||||||
|
--num_inference_steps 50 \
|
||||||
|
--max_sequence_length 512 \
|
||||||
|
--warmup_steps 5 \
|
||||||
|
--no_use_resolution_binning \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--use_torch_compile \
|
||||||
|
--img_file_paths /app/Flux/cat.png \
|
||||||
|
--model_type flux2 \
|
||||||
|
--guidance_scale 4.0 \
|
||||||
|
--num_repetitions 25
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if model.model == "stable-diffusion-3.5-large" %}
|
||||||
|
cd /app/StableDiffusion3.5
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--num_inference_steps 28 \
|
||||||
|
--prompt "A capybara holding a sign that reads Hello World" \
|
||||||
|
--use_torch_compile \
|
||||||
|
--pipefusion_parallel_degree 4 \
|
||||||
|
--use_cfg_parallel \
|
||||||
|
--num_repetitions 50 \
|
||||||
|
--dtype torch.float16 \
|
||||||
|
--output_path results
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
|
||||||
|
|
||||||
|
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
Previous versions
|
||||||
|
=================
|
||||||
|
|
||||||
|
See
|
||||||
|
:doc:`/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history`
|
||||||
|
to find documentation for previous releases of xDiT diffusion inference
|
||||||
|
performance testing.
|
||||||
@@ -15,33 +15,40 @@ benchmarking, see the version-specific documentation.
|
|||||||
- Components
|
- Components
|
||||||
- Resources
|
- Resources
|
||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.13`` (latest)
|
* - ``rocm/pytorch-xdit:v26.1``
|
||||||
-
|
-
|
||||||
* TheRock 1728a81
|
* TheRock 1728a81
|
||||||
-
|
-
|
||||||
* :doc:`Documentation <../../xdit-diffusion-inference>`
|
* :doc:`Documentation <../../xdit-diffusion-inference>`
|
||||||
|
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v26.1/images/sha256-4e35ebcad47042a41389b992ecb3489b3b0a922e4c34c7a0dd1098733a3db513>`__
|
||||||
|
|
||||||
|
* - ``rocm/pytorch-xdit:v25.13``
|
||||||
|
-
|
||||||
|
* TheRock 1728a81
|
||||||
|
-
|
||||||
|
* :doc:`Documentation <xdit-25.13>`
|
||||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef>`__
|
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef>`__
|
||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.12``
|
* - ``rocm/pytorch-xdit:v25.12``
|
||||||
-
|
-
|
||||||
* `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
|
* `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
|
||||||
* TheRock 3e3f834
|
* TheRock 3e3f834
|
||||||
-
|
-
|
||||||
* :doc:`Documentation <xdit-25.12>`
|
* :doc:`Documentation <xdit-25.12>`
|
||||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256>`__
|
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256>`__
|
||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.11``
|
* - ``rocm/pytorch-xdit:v25.11``
|
||||||
-
|
-
|
||||||
* `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
|
* `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
|
||||||
* TheRock 3e3f834
|
* TheRock 3e3f834
|
||||||
-
|
-
|
||||||
* :doc:`Documentation <xdit-25.11>`
|
* :doc:`Documentation <xdit-25.11>`
|
||||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216>`__
|
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216>`__
|
||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.10``
|
* - ``rocm/pytorch-xdit:v25.10``
|
||||||
-
|
-
|
||||||
* `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
|
* `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
|
||||||
* TheRock 7afbe45
|
* TheRock 7afbe45
|
||||||
-
|
-
|
||||||
* :doc:`Documentation <xdit-25.10>`
|
* :doc:`Documentation <xdit-25.10>`
|
||||||
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e>`__
|
* `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e>`__
|
||||||
|
|||||||
@@ -13,15 +13,10 @@ xDiT diffusion inference
|
|||||||
|
|
||||||
{% set docker = data.docker %}
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
|
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
|
||||||
a prebuilt, optimized environment based on `xDiT
|
benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
|
||||||
<https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
|
The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
|
||||||
video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
|
and includes the following components:
|
||||||
and MI300X (gfx942) GPUs.
|
|
||||||
|
|
||||||
The image runs a preview version of ROCm using the new `TheRock
|
|
||||||
<https://github.com/ROCm/TheRock>`__ build system and includes the following
|
|
||||||
components:
|
|
||||||
|
|
||||||
.. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
|
.. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
|
||||||
|
|
||||||
@@ -105,22 +100,6 @@ vary by model -- select one to get started.
|
|||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
Performance measurements
|
|
||||||
========================
|
|
||||||
|
|
||||||
To evaluate performance, the `Performance results with AMD ROCm software
|
|
||||||
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
|
||||||
page provides reference throughput and serving measurements for inferencing popular AI models.
|
|
||||||
|
|
||||||
.. important::
|
|
||||||
|
|
||||||
The performance data presented in `Performance results with AMD ROCm
|
|
||||||
software
|
|
||||||
<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
|
|
||||||
only reflects the latest version of this inference benchmarking environment.
|
|
||||||
The listed measurements should not be interpreted as the peak performance
|
|
||||||
achievable by AMD Instinct GPUs or ROCm software.
|
|
||||||
|
|
||||||
System validation
|
System validation
|
||||||
=================
|
=================
|
||||||
|
|
||||||
@@ -300,7 +279,7 @@ Run inference
|
|||||||
--tags {{model.mad_tag}} \
|
--tags {{model.mad_tag}} \
|
||||||
--keep-model-dir \
|
--keep-model-dir \
|
||||||
--live-output
|
--live-output
|
||||||
|
|
||||||
MAD launches a Docker container with the name
|
MAD launches a Docker container with the name
|
||||||
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
|
||||||
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
|
||||||
@@ -311,152 +290,15 @@ Run inference
|
|||||||
To run the benchmarks for {{ model.model }}, use the following command:
|
To run the benchmarks for {{ model.model }}, use the following command:
|
||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
{% if model.model == "Hunyuan Video" %}
|
|
||||||
cd /app/Hunyuanvideo
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 run.py \
|
{{ model.benchmark_command
|
||||||
--model {{ model.model_repo }} \
|
| map('replace', '{model_repo}', model.model_repo)
|
||||||
--prompt "In the large cage, two puppies were wagging their tails at each other." \
|
| map('trim')
|
||||||
--height 720 --width 1280 --num_frames 129 \
|
| join('\n ') }}
|
||||||
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--enable_tiling --enable_slicing \
|
|
||||||
--use_torch_compile \
|
|
||||||
--bench_output results
|
|
||||||
|
|
||||||
{% endif %}
|
The generated video will be stored under the results directory.
|
||||||
{% if model.model == "Wan2.1" %}
|
|
||||||
cd /app/Wan
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
|
||||||
--task i2v \
|
|
||||||
--height 720 \
|
|
||||||
--width 1280 \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--img_file_path /app/Wan/i2v_input.JPG \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--seed 42 \
|
|
||||||
--num_frames 81 \
|
|
||||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
|
||||||
--num_repetitions 1 \
|
|
||||||
--num_inference_steps 40 \
|
|
||||||
--use_torch_compile
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
{% if model.model == "Wan2.2" %}
|
|
||||||
cd /app/Wan
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
|
||||||
--task i2v \
|
|
||||||
--height 720 \
|
|
||||||
--width 1280 \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--img_file_path /app/Wan/i2v_input.JPG \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--seed 42 \
|
|
||||||
--num_frames 81 \
|
|
||||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
|
||||||
--num_repetitions 1 \
|
|
||||||
--num_inference_steps 40 \
|
|
||||||
--use_torch_compile
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if model.model == "FLUX.1" %}
|
|
||||||
cd /app/Flux
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Flux/run.py \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--seed 42 \
|
|
||||||
--prompt "A small cat" \
|
|
||||||
--height 1024 \
|
|
||||||
--width 1024 \
|
|
||||||
--num_inference_steps 25 \
|
|
||||||
--max_sequence_length 256 \
|
|
||||||
--warmup_steps 5 \
|
|
||||||
--no_use_resolution_binning \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--use_torch_compile \
|
|
||||||
--num_repetitions 50
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if model.model == "FLUX.1 Kontext" %}
|
|
||||||
cd /app/Flux
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--seed 42 \
|
|
||||||
--prompt "Add a cool hat to the cat" \
|
|
||||||
--height 1024 \
|
|
||||||
--width 1024 \
|
|
||||||
--num_inference_steps 30 \
|
|
||||||
--max_sequence_length 512 \
|
|
||||||
--warmup_steps 5 \
|
|
||||||
--no_use_resolution_binning \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--use_torch_compile \
|
|
||||||
--img_file_path /app/Flux/cat.png \
|
|
||||||
--model_type flux_kontext \
|
|
||||||
--guidance_scale 2.5 \
|
|
||||||
--num_repetitions 25
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if model.model == "FLUX.2" %}
|
|
||||||
cd /app/Flux
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--seed 42 \
|
|
||||||
--prompt "Add a cool hat to the cat" \
|
|
||||||
--height 1024 \
|
|
||||||
--width 1024 \
|
|
||||||
--num_inference_steps 50 \
|
|
||||||
--max_sequence_length 512 \
|
|
||||||
--warmup_steps 5 \
|
|
||||||
--no_use_resolution_binning \
|
|
||||||
--ulysses_degree 8 \
|
|
||||||
--use_torch_compile \
|
|
||||||
--img_file_paths /app/Flux/cat.png \
|
|
||||||
--model_type flux2 \
|
|
||||||
--guidance_scale 4.0 \
|
|
||||||
--num_repetitions 25
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if model.model == "stable-diffusion-3.5-large" %}
|
|
||||||
cd /app/StableDiffusion3.5
|
|
||||||
mkdir results
|
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
|
||||||
--model {{ model.model_repo }} \
|
|
||||||
--num_inference_steps 28 \
|
|
||||||
--prompt "A capybara holding a sign that reads Hello World" \
|
|
||||||
--use_torch_compile \
|
|
||||||
--pipefusion_parallel_degree 4 \
|
|
||||||
--use_cfg_parallel \
|
|
||||||
--num_repetitions 50 \
|
|
||||||
--dtype torch.float16 \
|
|
||||||
--output_path results
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
|
|
||||||
|
|
||||||
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
Previous versions
|
|
||||||
=================
|
|
||||||
|
|
||||||
See :doc:`benchmark-docker/previous-versions/xdit-history` to find documentation for previous releases
|
|
||||||
of xDiT diffusion inference performance testing.
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ click==8.3.1
|
|||||||
# sphinx-external-toc
|
# sphinx-external-toc
|
||||||
comm==0.2.3
|
comm==0.2.3
|
||||||
# via ipykernel
|
# via ipykernel
|
||||||
cryptography==46.0.3
|
cryptography==46.0.5
|
||||||
# via pyjwt
|
# via pyjwt
|
||||||
debugpy==1.8.19
|
debugpy==1.8.19
|
||||||
# via ipykernel
|
# via ipykernel
|
||||||
|
|||||||
Reference in New Issue
Block a user