xDiT diffusion inference v25.12 documentation update (#5786)

* Add xdit-diffusion ROCm docs page.

* Update template formatting and fix sphinx warnings

* Add System Validation section.

* Add sw component versions/commits.

* Update to use latest v25.10 image instead of v25.9

* Update commands and add FLUX instructions.

* Update Flux instructions. Change image tag. Describe as diffusion inference instead of specifically video.

* git rm xdit-video-diffusion.rst

* Docs for v25.12

* Add hyperlinks to components

* Command fixes

* -Diffusers suffix

* Simplify yaml file and cleanup main rst page.

* Spelling, added 'js'

* fix merge conflict

fix

---------

Co-authored-by: Kristoffer <kristoffer.torp@amd.com>
This commit is contained in:
peterjunpark
2025-12-17 10:20:10 -05:00
committed by GitHub
parent 1b4f25733d
commit 459283da3c
6 changed files with 160 additions and 164 deletions

View File

@@ -261,6 +261,7 @@ Ioffe
JAX's JAX's
JAXLIB JAXLIB
Jinja Jinja
js
JSON JSON
Jupyter Jupyter
KFD KFD

View File

@@ -1,109 +1,91 @@
xdit_diffusion_inference:
docker: docker:
- version: v25-11 pull_tag: rocm/pytorch-xdit:v25.12
pull_tag: rocm/pytorch-xdit:v25.11
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
ROCm: 7.10.0 ROCm: 7.10.0
whats_new:
- "Adds T2V and TI2V support for Wan models."
- "Adds support for SD-3.5 T2I model."
components:
TheRock:
version: 3e3f834
url: https://github.com/ROCm/TheRock
rccl:
version: d23d18f
url: https://github.com/ROCm/rccl
composable_kernel:
version: 2570462
url: https://github.com/ROCm/composable_kernel
rocm-libraries:
version: 0588f07
url: https://github.com/ROCm/rocm-libraries
rocm-systems:
version: 473025a
url: https://github.com/ROCm/rocm-systems
torch:
version: 73adac
url: https://github.com/pytorch/pytorch
torchvision:
version: f5c6c2e
url: https://github.com/pytorch/vision
triton:
version: 7416ffc
url: https://github.com/triton-lang/triton
accelerate:
version: 34c1779
url: https://github.com/huggingface/accelerate
aiter:
version: de14bec
url: https://github.com/ROCm/aiter
diffusers:
version: 40528e9
url: https://github.com/huggingface/diffusers
xfuser:
version: ccba9d5
url: https://github.com/xdit-project/xDiT
yunchang:
version: 2c9b712
url: https://github.com/feifeibear/long-context-attention
supported_models: supported_models:
- group: Hunyuan Video - group: Hunyuan Video
models: js_tag: hunyuan
- Hunyuan Video
- group: Wan-AI
models:
- Wan2.1
- Wan2.2
- group: FLUX
models:
- FLUX.1
whats_new:
- "Minor bug fixes and clarifications to READMEs."
- "Bumps TheRock, AITER, Diffusers, xDiT versions."
- "Changes Aiter rounding mode for faster gfx942 FWD Attention."
components:
TheRock: 3e3f834
rccl: d23d18f
composable_kernel: 2570462
rocm-libraries: 0588f07
rocm-systems: 473025a
torch: 73adac
torchvision: f5c6c2e
triton: 7416ffc
accelerate: 34c1779
aiter: de14bec
diffusers: 40528e9
xfuser: 83978b5
yunchang: 2c9b712
- version: v25-10
pull_tag: rocm/pytorch-xdit:v25.10
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
ROCm: 7.9.0
supported_models:
- group: Hunyuan Video
models:
- Hunyuan Video
- group: Wan-AI
models:
- Wan2.1
- Wan2.2
- group: FLUX
models:
- FLUX.1
whats_new:
- "First official xDiT Docker Release for Diffusion Inference."
- "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
- "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
components:
TheRock: 7afbe45
rccl: 9b04b2a
composable_kernel: b7a806f
rocm-libraries: f104555
rocm-systems: 25922d0
torch: 2.10.0a0+gite9c9017
torchvision: 0.22.0a0+966da7e
triton: 3.5.0+git52e49c12
accelerate: 1.11.0.dev0
aiter: 0.1.5.post4.dev20+ga25e55e79
diffusers: 0.36.0.dev0
xfuser: 0.4.4
yunchang: 0.6.3.post1
model_groups:
- group: Hunyuan Video
tag: hunyuan
models: models:
- model: Hunyuan Video - model: Hunyuan Video
page_tag: hunyuan_tag
model_name: hunyuanvideo
model_repo: tencent/HunyuanVideo model_repo: tencent/HunyuanVideo
revision: refs/pr/18 revision: refs/pr/18
url: https://huggingface.co/tencent/HunyuanVideo url: https://huggingface.co/tencent/HunyuanVideo
github: https://github.com/Tencent-Hunyuan/HunyuanVideo github: https://github.com/Tencent-Hunyuan/HunyuanVideo
mad_tag: pyt_xdit_hunyuanvideo mad_tag: pyt_xdit_hunyuanvideo
js_tag: hunyuan_tag
- group: Wan-AI - group: Wan-AI
tag: wan js_tag: wan
models: models:
- model: Wan2.1 - model: Wan2.1
page_tag: wan_21_tag model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
model_name: wan2_1-i2v-14b-720p url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
github: https://github.com/Wan-Video/Wan2.1 github: https://github.com/Wan-Video/Wan2.1
mad_tag: pyt_xdit_wan_2_1 mad_tag: pyt_xdit_wan_2_1
js_tag: wan_21_tag
- model: Wan2.2 - model: Wan2.2
page_tag: wan_22_tag model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
model_name: wan2_2-i2v-a14b url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
model_repo: Wan-AI/Wan2.2-I2V-A14B
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
github: https://github.com/Wan-Video/Wan2.2 github: https://github.com/Wan-Video/Wan2.2
mad_tag: pyt_xdit_wan_2_2 mad_tag: pyt_xdit_wan_2_2
js_tag: wan_22_tag
- group: FLUX - group: FLUX
tag: flux js_tag: flux
models: models:
- model: FLUX.1 - model: FLUX.1
page_tag: flux_1_tag
model_name: FLUX.1-dev
model_repo: black-forest-labs/FLUX.1-dev model_repo: black-forest-labs/FLUX.1-dev
url: https://huggingface.co/black-forest-labs/FLUX.1-dev url: https://huggingface.co/black-forest-labs/FLUX.1-dev
github: https://github.com/black-forest-labs/flux github: https://github.com/black-forest-labs/flux
mad_tag: pyt_xdit_flux mad_tag: pyt_xdit_flux
js_tag: flux_1_tag
- group: Stable Diffusion
js_tag: stablediffusion
models:
- model: stable-diffusion-3.5-large
model_repo: stabilityai/stable-diffusion-3.5-large
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
github: https://github.com/Stability-AI/sd3.5
mad_tag: pyt_xdit_sd_3_5
js_tag: stable_diffusion_3_5_large_tag

View File

@@ -1,3 +1,5 @@
:orphan:
.. meta:: .. meta::
:description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using :description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
prebuilt and optimized docker images. prebuilt and optimized docker images.

View File

@@ -17,7 +17,7 @@ benchmarking, see the version-specific documentation.
* - ``rocm/pytorch-xdit:v25.11`` (latest) * - ``rocm/pytorch-xdit:v25.11`` (latest)
- -
* ROCm 7.10.0 preview * `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
* TheRock 3e3f834 * TheRock 3e3f834
* rccl d23d18f * rccl d23d18f
* composable_kernel 2570462 * composable_kernel 2570462
@@ -37,7 +37,7 @@ benchmarking, see the version-specific documentation.
* - ``rocm/pytorch-xdit:v25.10`` * - ``rocm/pytorch-xdit:v25.10``
- -
* ROCm 7.9.0 preview * `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
* TheRock 7afbe45 * TheRock 7afbe45
* rccl 9b04b2a * rccl 9b04b2a
* composable_kernel b7a806f * composable_kernel b7a806f

View File

@@ -26,7 +26,6 @@ training, fine-tuning, and inference. It leverages popular machine learning fram
- :doc:`SGLang inference performance testing <benchmark-docker/sglang>` - :doc:`SGLang inference performance testing <benchmark-docker/sglang>`
- :doc:`Deploying your model <deploy-your-model>`
- :doc:`xDiT diffusion inference <xdit-diffusion-inference>` - :doc:`xDiT diffusion inference <xdit-diffusion-inference>`
- :doc:`Deploying your model <deploy-your-model>`

View File

@@ -11,11 +11,14 @@ xDiT diffusion inference
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %} {% set docker = data.docker %}
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
a prebuilt, optimized environment based on `xDiT
<https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
and MI300X (gfx942) GPUs.
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_ The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
and includes the following components: and includes the following components:
@@ -27,9 +30,9 @@ xDiT diffusion inference
* - Software component * - Software component
- Version - Version
{% for component_name, component_version in docker.components.items() %} {% for component_name, component_data in docker.components.items() %}
* - {{ component_name }} * - `{{ component_name }} <{{ component_data.url }}>`_
- {{ component_version }} - {{ component_data.version }}
{% endfor %} {% endfor %}
Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark. Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
@@ -37,10 +40,10 @@ For preview and development releases, see `amdsiloai/pytorch-xdit <https://hub.d
What's new What's new
========== ==========
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %} {% set docker = data.docker %}
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
{% for item in docker.whats_new %} {% for item in docker.whats_new %}
* {{ item }} * {{ item }}
@@ -57,14 +60,7 @@ vary by model -- select one to get started.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %} {% set docker = data.docker %}
{% set model_groups = data.xdit_diffusion_inference.model_groups %}
{# Create a lookup for supported models #}
{% set supported_lookup = {} %}
{% for supported in docker.supported_models %}
{% set _ = supported_lookup.update({supported.group: supported.models}) %}
{% endfor %}
.. raw:: html .. raw:: html
@@ -72,10 +68,8 @@ vary by model -- select one to get started.
<div class="row gx-0"> <div class="row gx-0">
<div class="col-2 me-1 px-2 model-param-head">Model</div> <div class="col-2 me-1 px-2 model-param-head">Model</div>
<div class="row col-10 pe-0"> <div class="row col-10 pe-0">
{% for model_group in model_groups %} {% for model_group in docker.supported_models %}
{% if model_group.group in supported_lookup %} <div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.js_tag }}" tabindex="0">{{ model_group.group }}</div>
<div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
{% endif %}
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
@@ -83,29 +77,24 @@ vary by model -- select one to get started.
<div class="row gx-0 pt-1"> <div class="row gx-0 pt-1">
<div class="col-2 me-1 px-2 model-param-head">Variant</div> <div class="col-2 me-1 px-2 model-param-head">Variant</div>
<div class="row col-10 pe-0"> <div class="row col-10 pe-0">
{% for model_group in model_groups %} {% for model_group in docker.supported_models %}
{% if model_group.group in supported_lookup %}
{% set supported_models = supported_lookup[model_group.group] %}
{% set models = model_group.models %} {% set models = model_group.models %}
{% for model in models %} {% for model in models %}
{% if model.model in supported_models %}
{% if models|length % 3 == 0 %} {% if models|length % 3 == 0 %}
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div> <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
{% else %} {% else %}
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div> <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
{% endif %}
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% endif %}
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
</div> </div>
{% for model_group in model_groups %} {% for model_group in docker.supported_models %}
{% for model in model_group.models %} {% for model in model_group.models %}
.. container:: model-doc {{ model.page_tag }} .. container:: model-doc {{ model.js_tag }}
.. note:: .. note::
@@ -136,7 +125,7 @@ Pull the Docker image
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %} {% set docker = data.docker %}
For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image. For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
Pull the image using the following command: Pull the image using the following command:
@@ -148,15 +137,17 @@ Pull the Docker image
Validate and benchmark Validate and benchmark
====================== ======================
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.docker %}
Once the image has been downloaded you can follow these steps to Once the image has been downloaded you can follow these steps to
run benchmarks and generate outputs. run benchmarks and generate outputs.
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml {% for model_group in docker.supported_models %}
{% for model_group in model_groups %}
{% for model in model_group.models %} {% for model in model_group.models %}
.. container:: model-doc {{model.page_tag}} .. container:: model-doc {{model.js_tag}}
The following commands are written for {{ model.model }}. The following commands are written for {{ model.model }}.
See :ref:`xdit-video-diffusion-supported-models` to switch to another available model. See :ref:`xdit-video-diffusion-supported-models` to switch to another available model.
@@ -171,12 +162,11 @@ You can either use an existing Hugging Face cache or download the model fresh in
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %} {% set docker = data.docker %}
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
{% for model_group in model_groups %} {% for model_group in docker.supported_models %}
{% for model in model_group.models %} {% for model in model_group.models %}
.. container:: model-doc {{model.page_tag}} .. container:: model-doc {{model.js_tag}}
.. tab-set:: .. tab-set::
@@ -264,11 +254,12 @@ Run inference
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
{% set model_groups = data.xdit_diffusion_inference.model_groups%} {% set docker = data.docker %}
{% for model_group in model_groups %}
{% for model_group in docker.supported_models %}
{% for model in model_group.models %} {% for model in model_group.models %}
.. container:: model-doc {{ model.page_tag }} .. container:: model-doc {{ model.js_tag }}
.. tab-set:: .. tab-set::
@@ -309,7 +300,7 @@ Run inference
mkdir results mkdir results
torchrun --nproc_per_node=8 run.py \ torchrun --nproc_per_node=8 run.py \
--model tencent/HunyuanVideo \ --model {{ model.model_repo }} \
--prompt "In the large cage, two puppies were wagging their tails at each other." \ --prompt "In the large cage, two puppies were wagging their tails at each other." \
--height 720 --width 1280 --num_frames 129 \ --height 720 --width 1280 --num_frames 129 \
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \ --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
@@ -317,40 +308,45 @@ Run inference
--enable_tiling --enable_slicing \ --enable_tiling --enable_slicing \
--use_torch_compile \ --use_torch_compile \
--bench_output results --bench_output results
{% endif %} {% endif %}
{% if model.model == "Wan2.1" %} {% if model.model == "Wan2.1" %}
cd Wan2.1 cd Wan
mkdir results mkdir results
torchrun --nproc_per_node=8 run.py \ torchrun --nproc_per_node=8 /app/Wan/run.py \
--task i2v-14B \ --task i2v \
--size 720*1280 --frame_num 81 \ --height 720 \
--ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.1-I2V-14B-720P/snapshots/8823af45fcc58a8aa999a54b04be9abc7d2aac98/" \ --width 1280 \
--image "/app/Wan2.1/examples/i2v_input.JPG" \ --model {{ model.model_repo }} \
--ulysses_size 8 --ring_size 1 \ --img_file_path /app/Wan/i2v_input.JPG \
--ulysses_degree 8 \
--seed 42 \
--num_frames 81 \
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \ --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
--benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \ --num_repetitions 1 \
--offload_model 0 \ --num_inference_steps 40 \
--vae_dtype bfloat16 \ --use_torch_compile
--allow_tf32 \
--compile
{% endif %} {% endif %}
{% if model.model == "Wan2.2" %} {% if model.model == "Wan2.2" %}
cd Wan2.2 cd Wan
mkdir results mkdir results
torchrun --nproc_per_node=8 run.py \ torchrun --nproc_per_node=8 /app/Wan/run.py \
--task i2v-A14B \ --task i2v \
--size 720*1280 --frame_num 81 \ --height 720 \
--ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.2-I2V-A14B/snapshots/206a9ee1b7bfaaf8f7e4d81335650533490646a3/" \ --width 1280 \
--image "/app/Wan2.2/examples/i2v_input.JPG" \ --model {{ model.model_repo }} \
--ulysses_size 8 --ring_size 1 \ --img_file_path /app/Wan/i2v_input.JPG \
--ulysses_degree 8 \
--seed 42 \
--num_frames 81 \
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \ --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
--benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \ --num_repetitions 1 \
--offload_model 0 \ --num_inference_steps 40 \
--vae_dtype bfloat16 \ --use_torch_compile
--allow_tf32 \
--compile
{% endif %} {% endif %}
{% if model.model == "FLUX.1" %} {% if model.model == "FLUX.1" %}
@@ -358,7 +354,7 @@ Run inference
mkdir results mkdir results
torchrun --nproc_per_node=8 /app/Flux/run.py \ torchrun --nproc_per_node=8 /app/Flux/run.py \
--model black-forest-labs/FLUX.1-dev \ --model {{ model.model_repo }} \
--seed 42 \ --seed 42 \
--prompt "A small cat" \ --prompt "A small cat" \
--height 1024 \ --height 1024 \
@@ -369,12 +365,28 @@ Run inference
--no_use_resolution_binning \ --no_use_resolution_binning \
--ulysses_degree 8 \ --ulysses_degree 8 \
--use_torch_compile \ --use_torch_compile \
--num_repetitions 1 \ --num_repetitions 50
--benchmark_output_directory results
{% endif %} {% endif %}
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% endif %} {% if model.model == "stable-diffusion-3.5-large" %}
cd StableDiffusion3.5
mkdir results
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
--model {{ model.model_repo }} \
--num_inference_steps 28 \
--prompt "A capybara holding a sign that reads Hello World" \
--use_torch_compile \
--pipefusion_parallel_degree 4 \
--use_cfg_parallel \
--num_repetitions 50 \
--dtype torch.float16 \
--output_path results
{% endif %}
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %} {% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}