xDiT diffusion inference docker 26.1

2026-02-11 23:15:06 -05:00 · 2026-02-11 13:16:55 -05:00
parent d69483f946
commit 172661af80
2 changed files with 197 additions and 177 deletions
--- a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
@@ -1,14 +1,13 @@
 docker:
-  pull_tag: rocm/pytorch-xdit:v25.13
-  docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
+  pull_tag: rocm/pytorch-xdit:v26.1
+  docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
  ROCm: 7.11.0
  whats_new:
-    - "Flux.1 Kontext support"
-    - "Flux.2 Dev support"
-    - "Flux FP8 GEMM support"
-    - "Hybrid FP8 attention support for Wan models"
+    - "HunyuanVideo 1.5 support"
+    - "Z-Image Turbo support"
+    - "Wan model sharding"
  components:
-    TheRock: 
+    TheRock:
      version: 1728a81
      url: https://github.com/ROCm/TheRock
    rccl:
@@ -39,10 +38,10 @@ docker:
      version: de14bec0
      url: https://github.com/ROCm/aiter
    diffusers:
-      version: a1f36ee3e
+      version: 6708f5
      url: https://github.com/huggingface/diffusers
    xfuser:
-      version: adf2681
+      version: 0a3d7a
      url: https://github.com/xdit-project/xDiT
    yunchang:
      version: 2c9b712
@@ -58,6 +57,49 @@ docker:
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
          js_tag: hunyuan_tag
+          benchmark_command:
+            - cd /app/Hunyuanvideo
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 run.py \'
+            - '--model {model_repo} \'
+            - '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
+            - '--batch_size 1 \'
+            - '--height 720 --width 1280 \'
+            - '--seed 1168860793 \'
+            - '--num_frames 129 \'
+            - '--num_inference_steps 50 \'
+            - '--warmup_steps 1 \'
+            - '--n_repeats 1 \'
+            - '--sleep_dur 10 \'
+            - '--ulysses_degree 8 \'
+            - '--enable_tiling --enable_slicing \'
+            - '--guidance_scale 6.0 \'
+            - '--use_torch_compile \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
+        - model: Hunyuan Video 1.5
+          model_repo: hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
+          url: https://huggingface.co/hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
+          github: https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5
+          mad_tag: pyt_xdit_hunyuanvideo_1_5
+          js_tag: hunyuan_1_5_tag
+          benchmark_command:
+            - cd /app/Hunyuanvideo_1_5
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Hunyuanvideo_1_5/run.py \'
+            - '--model {model_repo} \'
+            - '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
+            - '--task t2v \'
+            - '--height 720 --width 1280 \'
+            - '--seed 1168860793 \'
+            - '--num_frames 129 \'
+            - '--num_inference_steps 50 \'
+            - '--num_repetitions 1 \'
+            - '--ulysses_degree 8 \'
+            - '--enable_tiling --enable_slicing \'
+            - '--use_torch_compile \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
    - group: Wan-AI
      js_tag: wan
      models:
@@ -67,12 +109,48 @@ docker:
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
          js_tag: wan_21_tag
+          benchmark_command:
+            - cd /app/Wan
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
+            - '--model {model_repo} \'
+            - '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
+            - '--task i2v \'
+            - '--height 720 \'
+            - '--width 1280 \'
+            - '--img_file_path /app/Wan/i2v_input.JPG \'
+            - '--num_frames 81 \'
+            - '--ulysses_degree 8 \'
+            - '--seed 42 \'
+            - '--num_repetitions 1 \'
+            - '--num_inference_steps 40 \'
+            - '--use_torch_compile \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
        - model: Wan2.2
          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
          js_tag: wan_22_tag
+          benchmark_command:
+            - cd /app/Wan
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
+            - '--model {model_repo} \'
+            - '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
+            - '--task i2v \'
+            - '--height 720 \'
+            - '--width 1280 \'
+            - '--img_file_path /app/Wan/i2v_input.JPG \'
+            - '--num_frames 81 \'
+            - '--ulysses_degree 8 \'
+            - '--seed 42 \'
+            - '--num_repetitions 1 \'
+            - '--num_inference_steps 40 \'
+            - '--use_torch_compile \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
    - group: FLUX
      js_tag: flux
      models:
@@ -82,18 +160,79 @@ docker:
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
          js_tag: flux_1_tag
+          benchmark_command:
+            - cd /app/Flux
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Flux/run.py \'
+            - '--model {model_repo} \'
+            - '--seed 42 \'
+            - '--prompt "A small cat" \'
+            - '--height 1024 \'
+            - '--width 1024 \'
+            - '--num_inference_steps 25 \'
+            - '--max_sequence_length 256 \'
+            - '--warmup_steps 5 \'
+            - '--no_use_resolution_binning \'
+            - '--ulysses_degree 8 \'
+            - '--use_torch_compile \'
+            - '--guidance_scale 0.0 \'
+            - '--num_repetitions 50 \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
        - model: FLUX.1 Kontext
          model_repo: black-forest-labs/FLUX.1-Kontext-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux_kontext
          js_tag: flux_1_kontext_tag
+          benchmark_command:
+            - cd /app/Flux
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
+            - '--model {model_repo} \'
+            - '--seed 42 \'
+            - '--prompt "Add a cool hat to the cat" \'
+            - '--height 1024 \'
+            - '--width 1024 \'
+            - '--num_inference_steps 30 \'
+            - '--max_sequence_length 512 \'
+            - '--warmup_steps 5 \'
+            - '--no_use_resolution_binning \'
+            - '--ulysses_degree 8 \'
+            - '--use_torch_compile \'
+            - '--img_file_path /app/Flux/cat.png \'
+            - '--model_type flux_kontext \'
+            - '--guidance_scale 2.5 \'
+            - '--num_repetitions 25 \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
        - model: FLUX.2
          model_repo: black-forest-labs/FLUX.2-dev
          url: https://huggingface.co/black-forest-labs/FLUX.2-dev
          github: https://github.com/black-forest-labs/flux2
          mad_tag: pyt_xdit_flux_2
          js_tag: flux_2_tag
+          benchmark_command:
+            - cd /app/Flux
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
+            - '--model {model_repo} \'
+            - '--seed 42 \'
+            - '--prompt "Add a cool hat to the cat" \'
+            - '--height 1024 \'
+            - '--width 1024 \'
+            - '--num_inference_steps 50 \'
+            - '--max_sequence_length 512 \'
+            - '--warmup_steps 5 \'
+            - '--no_use_resolution_binning \'
+            - '--ulysses_degree 8 \'
+            - '--use_torch_compile \'
+            - '--img_file_paths /app/Flux/cat.png \'
+            - '--model_type flux2 \'
+            - '--guidance_scale 4.0 \'
+            - '--num_repetitions 25 \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
    - group: StableDiffusion
      js_tag: stablediffusion
      models:
@@ -103,3 +242,42 @@ docker:
          github: https://github.com/Stability-AI/sd3.5
          mad_tag: pyt_xdit_sd_3_5
          js_tag: stable_diffusion_3_5_large_tag
+          benchmark_command:
+            - cd /app/StableDiffusion3.5
+            - mkdir results
+            - 'torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \'
+            - '--model {model_repo} \'
+            - '--prompt "A capybara holding a sign that reads Hello World" \'
+            - '--num_repetitions 50 \'
+            - '--num_inference_steps 28 \'
+            - '--pipefusion_parallel_degree 4 \'
+            - '--use_cfg_parallel \'
+            - '--use_torch_compile \'
+            - '--dtype torch.float16 \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
+    - group: Z-Image
+      js_tag: z_image
+      models:
+        - model: Z-Image Turbo
+          model_repo: Tongyi-MAI/Z-Image-Turbo
+          url: https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
+          github: https://github.com/Tongyi-MAI/Z-Image
+          mad_tag: pyt_xdit_z_image_turbo
+          js_tag: z_image_turbo_tag
+          benchmark_command:
+            - cd /app/Z-Image
+            - mkdir results
+            - 'torchrun --nproc_per_node=2 /app/Z-Image/run.py \'
+            - '--model {model_repo} \'
+            - '--seed 42 \'
+            - '--prompt "A crowded beach" \'
+            - '--height 1088 \'
+            - '--width 1920 \'
+            - '--num_inference_steps 9 \'
+            - '--ulysses_degree 2 \'
+            - '--use_torch_compile \'
+            - '--guidance_scale 0.0 \'
+            - '--num_repetitions 50 \'
+            - '--attention_backend aiter \'
+            - '--benchmark_output_directory results'
--- a/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
+++ b/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
@@ -13,15 +13,10 @@ xDiT diffusion inference

   {% set docker = data.docker %}

-   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
-   a prebuilt, optimized environment based on `xDiT
-   <https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
-   video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
-   and MI300X (gfx942) GPUs.
-
-   The image runs a preview version of ROCm using the new `TheRock
-   <https://github.com/ROCm/TheRock>`__ build system and includes the following
-   components:
+   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
+   benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
+   The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
+   and includes the following components:

   .. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}

@@ -105,22 +100,6 @@ vary by model -- select one to get started.
       {% endfor %}
   {% endfor %}

-Performance measurements
-========================
-
-To evaluate performance, the `Performance results with AMD ROCm software
-<https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
-page provides reference throughput and serving measurements for inferencing popular AI models.
-
-.. important::
-
-   The performance data presented in `Performance results with AMD ROCm
-   software
-   <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
-   only reflects the latest version of this inference benchmarking environment.
-   The listed measurements should not be interpreted as the peak performance
-   achievable by AMD Instinct GPUs or ROCm software.
-
 System validation
 =================

@@ -300,7 +279,7 @@ Run inference
                      --tags {{model.mad_tag}} \
                      --keep-model-dir \
                      --live-output
-                     
+
            MAD launches a Docker container with the name
            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
@@ -311,152 +290,15 @@ Run inference
            To run the benchmarks for {{ model.model }}, use the following command:

            .. code-block:: shell
-            {% if model.model == "Hunyuan Video" %}
-               cd /app/Hunyuanvideo
-               mkdir results

-               torchrun --nproc_per_node=8 run.py \
-                  --model {{ model.model_repo }} \
-                  --prompt "In the large cage, two puppies were wagging their tails at each other." \
-                  --height 720 --width 1280 --num_frames 129 \
-                  --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
-                  --ulysses_degree 8 \
-                  --enable_tiling --enable_slicing \
-                  --use_torch_compile \
-                  --bench_output results
+               {{ model.benchmark_command
+                  | map('replace', '{model_repo}', model.model_repo)
+                  | map('trim')
+                  | join('\n               ') }}

-            {% endif %}
-            {% if model.model == "Wan2.1" %}
-               cd /app/Wan
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/Wan/run.py \
-                  --task i2v \
-                  --height 720 \
-                  --width 1280 \
-                  --model {{ model.model_repo }} \
-                  --img_file_path /app/Wan/i2v_input.JPG \
-                  --ulysses_degree 8 \
-                  --seed 42 \
-                  --num_frames 81 \
-                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
-                  --num_repetitions 1 \
-                  --num_inference_steps 40 \
-                  --use_torch_compile
-
-            {% endif %}
-            {% if model.model == "Wan2.2" %}
-               cd /app/Wan
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/Wan/run.py \
-                  --task i2v \
-                  --height 720 \
-                  --width 1280 \
-                  --model {{ model.model_repo }} \
-                  --img_file_path /app/Wan/i2v_input.JPG \
-                  --ulysses_degree 8 \
-                  --seed 42 \
-                  --num_frames 81 \
-                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
-                  --num_repetitions 1 \
-                  --num_inference_steps 40 \
-                  --use_torch_compile
-
-            {% endif %}
-
-            {% if model.model == "FLUX.1" %}
-               cd /app/Flux
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/Flux/run.py \
-                  --model {{ model.model_repo }} \
-                  --seed 42 \
-                  --prompt "A small cat" \
-                  --height 1024 \
-                  --width 1024 \
-                  --num_inference_steps 25 \
-                  --max_sequence_length 256 \
-                  --warmup_steps 5 \
-                  --no_use_resolution_binning \
-                  --ulysses_degree 8 \
-                  --use_torch_compile \
-                  --num_repetitions 50
-
-            {% endif %}
-
-            {% if model.model == "FLUX.1 Kontext" %}
-               cd /app/Flux
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
-                  --model {{ model.model_repo }} \
-                  --seed 42 \
-                  --prompt "Add a cool hat to the cat" \
-                  --height 1024 \
-                  --width 1024 \
-                  --num_inference_steps 30 \
-                  --max_sequence_length 512 \
-                  --warmup_steps 5 \
-                  --no_use_resolution_binning \
-                  --ulysses_degree 8 \
-                  --use_torch_compile \
-                  --img_file_path /app/Flux/cat.png \
-                  --model_type flux_kontext \
-                  --guidance_scale 2.5 \
-                  --num_repetitions 25
-
-            {% endif %}
-
-            {% if model.model == "FLUX.2" %}
-               cd /app/Flux
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
-                  --model {{ model.model_repo }} \
-                  --seed 42 \
-                  --prompt "Add a cool hat to the cat" \
-                  --height 1024 \
-                  --width 1024 \
-                  --num_inference_steps 50 \
-                  --max_sequence_length 512 \
-                  --warmup_steps 5 \
-                  --no_use_resolution_binning \
-                  --ulysses_degree 8 \
-                  --use_torch_compile \
-                  --img_file_paths /app/Flux/cat.png \
-                  --model_type flux2 \
-                  --guidance_scale 4.0 \
-                  --num_repetitions 25
-
-            {% endif %}
-
-            {% if model.model == "stable-diffusion-3.5-large" %}
-               cd /app/StableDiffusion3.5 
-               mkdir results
-
-               torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
-                  --model {{ model.model_repo }} \
-                  --num_inference_steps 28 \
-                  --prompt "A capybara holding a sign that reads Hello World" \
-                  --use_torch_compile \
-                  --pipefusion_parallel_degree 4 \
-                  --use_cfg_parallel \
-                  --num_repetitions 50 \
-                  --dtype torch.float16 \
-                  --output_path results
-
-            {% endif %}
-
-            The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
+            The generated video will be stored under the results directory.

            {% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}

      {% endfor %}
    {% endfor %}
-
-Previous versions
-=================
-
-See :doc:`benchmark-docker/previous-versions/xdit-history` to find documentation for previous releases
-of xDiT diffusion inference performance testing.