From 459283da3ce80e513fdc8fb583cddefcf51f6088 Mon Sep 17 00:00:00 2001
From: peterjunpark <petepark@amd.com>
Date: Wed, 17 Dec 2025 10:20:10 -0500
Subject: [PATCH] xDiT diffusion inference v25.12 documentation update (#5786)

* Add xdit-diffusion ROCm docs page.

* Update template formatting and fix sphinx warnings

* Add System Validation section.

* Add sw component versions/commits.

* Update to use latest v25.10 image instead of v25.9

* Update commands and add FLUX instructions.

* Update Flux instructions. Change image tag. Describe as diffusion inference instead of specifically video.

* git rm xdit-video-diffusion.rst

* Docs for v25.12

* Add hyperlinks to components

* Command fixes

* -Diffusers suffix

* Simplify yaml file and cleanup main rst page.

* Spelling, added 'js'

* fix merge conflict

fix

---------

Co-authored-by: Kristoffer <kristoffer.torp@amd.com>
---
 .wordlist.txt                                 |   1 +
 .../inference/xdit-inference-models.yaml      | 154 ++++++++---------
 .../previous-versions/xdit-25.10.rst          |   2 +
 .../previous-versions/xdit-history.rst        |   4 +-
 docs/how-to/rocm-for-ai/inference/index.rst   |   3 +-
 .../inference/xdit-diffusion-inference.rst    | 160 ++++++++++--------
 6 files changed, 160 insertions(+), 164 deletions(-)

diff --git a/.wordlist.txt b/.wordlist.txt
index 0fee3d0cf..889606056 100644
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -261,6 +261,7 @@ Ioffe
 JAX's
 JAXLIB
 Jinja
+js
 JSON
 Jupyter
 KFD
diff --git a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
index e88b4ef0b..8d462524a 100644
--- a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
@@ -1,109 +1,91 @@
-xdit_diffusion_inference:
-  docker:
-    - version: v25-11
-      pull_tag: rocm/pytorch-xdit:v25.11
-      docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
-      ROCm: 7.10.0
-      supported_models:
-        - group: Hunyuan Video
-          models:
-            - Hunyuan Video
-        - group: Wan-AI
-          models:
-            - Wan2.1
-            - Wan2.2
-        - group: FLUX
-          models:
-            - FLUX.1
-      whats_new:
-        - "Minor bug fixes and clarifications to READMEs."
-        - "Bumps TheRock, AITER, Diffusers, xDiT versions."
-        - "Changes Aiter rounding mode for faster gfx942 FWD Attention."
-      components:
-        TheRock: 3e3f834
-        rccl: d23d18f
-        composable_kernel: 2570462
-        rocm-libraries: 0588f07
-        rocm-systems: 473025a
-        torch: 73adac
-        torchvision: f5c6c2e
-        triton: 7416ffc
-        accelerate: 34c1779
-        aiter: de14bec
-        diffusers: 40528e9
-        xfuser: 83978b5
-        yunchang: 2c9b712
-
-    - version: v25-10
-      pull_tag: rocm/pytorch-xdit:v25.10
-      docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
-      ROCm: 7.9.0
-      supported_models:
-        - group: Hunyuan Video
-          models:
-            - Hunyuan Video
-        - group: Wan-AI
-          models:
-            - Wan2.1
-            - Wan2.2
-        - group: FLUX
-          models:
-            - FLUX.1
-      whats_new:
-        - "First official xDiT Docker Release for Diffusion Inference."
-        - "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
-        - "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
-      components:
-        TheRock: 7afbe45
-        rccl: 9b04b2a
-        composable_kernel: b7a806f
-        rocm-libraries: f104555
-        rocm-systems: 25922d0
-        torch: 2.10.0a0+gite9c9017
-        torchvision: 0.22.0a0+966da7e
-        triton: 3.5.0+git52e49c12
-        accelerate: 1.11.0.dev0
-        aiter: 0.1.5.post4.dev20+ga25e55e79
-        diffusers: 0.36.0.dev0
-        xfuser: 0.4.4
-        yunchang: 0.6.3.post1
-
-  model_groups:
+docker:
+  pull_tag: rocm/pytorch-xdit:v25.12
+  docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
+  ROCm: 7.10.0
+  whats_new:
+      - "Adds T2V and TI2V support for Wan models."
+      - "Adds support for SD-3.5 T2I model."
+  components:
+    TheRock: 
+      version: 3e3f834
+      url: https://github.com/ROCm/TheRock
+    rccl:
+      version: d23d18f
+      url: https://github.com/ROCm/rccl
+    composable_kernel:
+      version: 2570462
+      url: https://github.com/ROCm/composable_kernel
+    rocm-libraries:
+      version: 0588f07
+      url: https://github.com/ROCm/rocm-libraries
+    rocm-systems:
+      version: 473025a
+      url: https://github.com/ROCm/rocm-systems
+    torch:
+      version: 73adac
+      url: https://github.com/pytorch/pytorch
+    torchvision:
+      version: f5c6c2e
+      url: https://github.com/pytorch/vision
+    triton:
+      version: 7416ffc
+      url: https://github.com/triton-lang/triton
+    accelerate:
+      version: 34c1779
+      url: https://github.com/huggingface/accelerate
+    aiter:
+      version: de14bec
+      url: https://github.com/ROCm/aiter
+    diffusers:
+      version: 40528e9
+      url: https://github.com/huggingface/diffusers
+    xfuser:
+      version: ccba9d5
+      url: https://github.com/xdit-project/xDiT
+    yunchang:
+      version: 2c9b712
+      url: https://github.com/feifeibear/long-context-attention
+  supported_models:
     - group: Hunyuan Video
-      tag: hunyuan
+      js_tag: hunyuan
       models:
         - model: Hunyuan Video
-          page_tag: hunyuan_tag
-          model_name: hunyuanvideo
           model_repo: tencent/HunyuanVideo
           revision: refs/pr/18
           url: https://huggingface.co/tencent/HunyuanVideo
           github: https://github.com/Tencent-Hunyuan/HunyuanVideo
           mad_tag: pyt_xdit_hunyuanvideo
+          js_tag: hunyuan_tag
     - group: Wan-AI
-      tag: wan
+      js_tag: wan
       models:
         - model: Wan2.1
-          page_tag: wan_21_tag
-          model_name: wan2_1-i2v-14b-720p
-          model_repo: Wan-AI/Wan2.1-I2V-14B-720P
-          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
+          model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
+          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
           github: https://github.com/Wan-Video/Wan2.1
           mad_tag: pyt_xdit_wan_2_1
+          js_tag: wan_21_tag
         - model: Wan2.2
-          page_tag: wan_22_tag
-          model_name: wan2_2-i2v-a14b
-          model_repo: Wan-AI/Wan2.2-I2V-A14B
-          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
+          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
+          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
           github: https://github.com/Wan-Video/Wan2.2
           mad_tag: pyt_xdit_wan_2_2
+          js_tag: wan_22_tag
     - group: FLUX
-      tag: flux
+      js_tag: flux
       models:
         - model: FLUX.1
-          page_tag: flux_1_tag
-          model_name: FLUX.1-dev
           model_repo: black-forest-labs/FLUX.1-dev
           url: https://huggingface.co/black-forest-labs/FLUX.1-dev
           github: https://github.com/black-forest-labs/flux
           mad_tag: pyt_xdit_flux
+          js_tag: flux_1_tag
+    - group: Stable Diffusion
+      js_tag: stablediffusion
+      models:
+        - model: stable-diffusion-3.5-large
+          model_repo: stabilityai/stable-diffusion-3.5-large
+          url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
+          github: https://github.com/Stability-AI/sd3.5
+          mad_tag: pyt_xdit_sd_3_5
+          js_tag: stable_diffusion_3_5_large_tag
diff --git a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10.rst b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10.rst
index 92c2e908a..9bbbd84a9 100644
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.10.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. meta::
    :description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
                  prebuilt and optimized docker images.
diff --git a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst
index a93c66c1e..28609ae59 100644
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst
@@ -17,7 +17,7 @@ benchmarking, see the version-specific documentation.
 
    * - ``rocm/pytorch-xdit:v25.11`` (latest)
      - 
-       * ROCm 7.10.0 preview
+       * `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
        * TheRock 3e3f834
        * rccl d23d18f
        * composable_kernel 2570462
@@ -37,7 +37,7 @@ benchmarking, see the version-specific documentation.
 
    * - ``rocm/pytorch-xdit:v25.10``
      - 
-       * ROCm 7.9.0 preview
+       * `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
        * TheRock 7afbe45
        * rccl 9b04b2a
        * composable_kernel b7a806f
diff --git a/docs/how-to/rocm-for-ai/inference/index.rst b/docs/how-to/rocm-for-ai/inference/index.rst
index f12054b59..353c05b53 100644
--- a/docs/how-to/rocm-for-ai/inference/index.rst
+++ b/docs/how-to/rocm-for-ai/inference/index.rst
@@ -26,7 +26,6 @@ training, fine-tuning, and inference. It leverages popular machine learning fram
 
 - :doc:`SGLang inference performance testing <benchmark-docker/sglang>`
 
-- :doc:`Deploying your model <deploy-your-model>`
-
 - :doc:`xDiT diffusion inference <xdit-diffusion-inference>`
 
+- :doc:`Deploying your model <deploy-your-model>`
diff --git a/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst b/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
index 6e71d8431..b9b1da113 100644
--- a/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
+++ b/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
@@ -11,11 +11,14 @@ xDiT diffusion inference
 
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
-   {% set model_groups = data.xdit_diffusion_inference.model_groups%}
+   {% set docker = data.docker %}
+
+   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
+   a prebuilt, optimized environment based on `xDiT
+   <https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
+   video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
+   and MI300X (gfx942) GPUs.
 
-   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
-   benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
    The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
    and includes the following components:
 
@@ -27,9 +30,9 @@ xDiT diffusion inference
          * - Software component
            - Version
 
-         {% for component_name, component_version in docker.components.items() %}
-         * - {{ component_name }}
-           - {{ component_version }}
+         {% for component_name, component_data in docker.components.items() %}
+         * - `{{ component_name }} <{{ component_data.url }}>`_
+           - {{ component_data.version }}
          {% endfor %}
 
 Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
@@ -37,10 +40,10 @@ For preview and development releases, see `amdsiloai/pytorch-xdit <https://hub.d
 
 What's new
 ==========
+
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
-   {% set model_groups = data.xdit_diffusion_inference.model_groups%}
+   {% set docker = data.docker %}
 
    {% for item in docker.whats_new %}
    * {{ item }}
@@ -57,14 +60,7 @@ vary by model -- select one to get started.
 
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
-   {% set model_groups = data.xdit_diffusion_inference.model_groups %}
-   
-   {# Create a lookup for supported models #}
-   {% set supported_lookup = {} %}
-   {% for supported in docker.supported_models %}
-   {% set _ = supported_lookup.update({supported.group: supported.models}) %}
-   {% endfor %}
+   {% set docker = data.docker %}
 
    .. raw:: html
 
@@ -72,10 +68,8 @@ vary by model -- select one to get started.
           <div class="row gx-0">
               <div class="col-2 me-1 px-2 model-param-head">Model</div>
               <div class="row col-10 pe-0">
-        {% for model_group in model_groups %}
-            {% if model_group.group in supported_lookup %}
-                  <div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
-            {% endif %}
+        {% for model_group in docker.supported_models %}
+               <div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.js_tag }}" tabindex="0">{{ model_group.group }}</div>
         {% endfor %}
               </div>
           </div>
@@ -83,29 +77,24 @@ vary by model -- select one to get started.
           <div class="row gx-0 pt-1">
               <div class="col-2 me-1 px-2 model-param-head">Variant</div>
               <div class="row col-10 pe-0">
-        {% for model_group in model_groups %}
-            {% if model_group.group in supported_lookup %}
-            {% set supported_models = supported_lookup[model_group.group] %}
+        {% for model_group in docker.supported_models %}
             {% set models = model_group.models %}
             {% for model in models %}
-                {% if model.model in supported_models %}
                 {% if models|length % 3 == 0 %}
-                <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+                <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
                 {% else %}
-                <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
-                {% endif %}
+                <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
                 {% endif %}
             {% endfor %}
-            {% endif %}
         {% endfor %}
               </div>
           </div>
       </div>
 
-   {% for model_group in model_groups %}
+   {% for model_group in docker.supported_models %}
        {% for model in model_group.models %}
 
-   .. container:: model-doc {{ model.page_tag }}
+   .. container:: model-doc {{ model.js_tag }}
 
       .. note::
 
@@ -136,7 +125,7 @@ Pull the Docker image
 
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
+   {% set docker = data.docker %}
 
    For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
    Pull the image using the following command:
@@ -148,15 +137,17 @@ Pull the Docker image
 Validate and benchmark
 ======================
 
-Once the image has been downloaded you can follow these steps to
-run benchmarks and generate outputs.
-
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% for model_group in model_groups %}
+   {% set docker = data.docker %}
+
+   Once the image has been downloaded you can follow these steps to
+   run benchmarks and generate outputs.
+
+   {% for model_group in docker.supported_models %}
      {% for model in model_group.models %}
 
-   .. container:: model-doc {{model.page_tag}}
+   .. container:: model-doc {{model.js_tag}}
 
       The following commands are written for {{ model.model }}.
       See :ref:`xdit-video-diffusion-supported-models` to switch to another available model.
@@ -171,12 +162,11 @@ You can either use an existing Hugging Face cache or download the model fresh in
 
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
-   {% set model_groups = data.xdit_diffusion_inference.model_groups%}
+   {% set docker = data.docker %}
 
-   {% for model_group in model_groups %}
+   {% for model_group in docker.supported_models %}
      {% for model in model_group.models %}
-   .. container:: model-doc {{model.page_tag}}
+   .. container:: model-doc {{model.js_tag}}
 
       .. tab-set::
 
@@ -264,11 +254,12 @@ Run inference
 
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
 
-   {% set model_groups = data.xdit_diffusion_inference.model_groups%}
-   {% for model_group in model_groups %}
+   {% set docker = data.docker %}
+
+   {% for model_group in docker.supported_models %}
      {% for model in model_group.models %}
 
-   .. container:: model-doc {{ model.page_tag }}
+   .. container:: model-doc {{ model.js_tag }}
 
       .. tab-set::
 
@@ -309,7 +300,7 @@ Run inference
                mkdir results
 
                torchrun --nproc_per_node=8 run.py \
-                  --model tencent/HunyuanVideo \
+                  --model {{ model.model_repo }} \
                   --prompt "In the large cage, two puppies were wagging their tails at each other." \
                   --height 720 --width 1280 --num_frames 129 \
                   --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
@@ -317,40 +308,45 @@ Run inference
                   --enable_tiling --enable_slicing \
                   --use_torch_compile \
                   --bench_output results
+
             {% endif %}
             {% if model.model == "Wan2.1" %}
-               cd Wan2.1
+               cd Wan
                mkdir results
 
-               torchrun --nproc_per_node=8 run.py \
-                  --task i2v-14B \
-                  --size 720*1280 --frame_num 81 \
-                  --ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.1-I2V-14B-720P/snapshots/8823af45fcc58a8aa999a54b04be9abc7d2aac98/" \
-                  --image "/app/Wan2.1/examples/i2v_input.JPG" \
-                  --ulysses_size 8 --ring_size 1 \
+               torchrun --nproc_per_node=8 /app/Wan/run.py \
+                  --task i2v \
+                  --height 720 \
+                  --width 1280 \
+                  --model {{ model.model_repo }} \
+                  --img_file_path /app/Wan/i2v_input.JPG \
+                  --ulysses_degree 8 \
+                  --seed 42 \
+                  --num_frames 81 \
                   --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
-                  --benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \
-                  --offload_model 0 \
-                  --vae_dtype bfloat16 \
-                  --allow_tf32 \
-                  --compile
+                  --num_repetitions 1 \
+                  --num_inference_steps 40 \
+                  --use_torch_compile
+
             {% endif %}
             {% if model.model == "Wan2.2" %}
-               cd Wan2.2
+               cd Wan
                mkdir results
 
-               torchrun --nproc_per_node=8 run.py \
-                  --task i2v-A14B \
-                  --size 720*1280 --frame_num 81 \
-                  --ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.2-I2V-A14B/snapshots/206a9ee1b7bfaaf8f7e4d81335650533490646a3/" \
-                  --image "/app/Wan2.2/examples/i2v_input.JPG" \
-                  --ulysses_size 8 --ring_size 1 \
+               torchrun --nproc_per_node=8 /app/Wan/run.py \
+                  --task i2v \
+                  --height 720 \
+                  --width 1280 \
+                  --model {{ model.model_repo }} \
+                  --img_file_path /app/Wan/i2v_input.JPG \
+                  --ulysses_degree 8 \
+                  --seed 42 \
+                  --num_frames 81 \
                   --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
-                  --benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \
-                  --offload_model 0 \
-                  --vae_dtype bfloat16 \
-                  --allow_tf32 \
-                  --compile
+                  --num_repetitions 1 \
+                  --num_inference_steps 40 \
+                  --use_torch_compile
+
             {% endif %}
 
             {% if model.model == "FLUX.1" %}
@@ -358,7 +354,7 @@ Run inference
                mkdir results
 
                torchrun --nproc_per_node=8 /app/Flux/run.py \
-                  --model black-forest-labs/FLUX.1-dev \
+                  --model {{ model.model_repo }} \
                   --seed 42 \
                   --prompt "A small cat" \
                   --height 1024 \
@@ -369,12 +365,28 @@ Run inference
                   --no_use_resolution_binning \
                   --ulysses_degree 8 \
                   --use_torch_compile \
-                  --num_repetitions 1 \
-                  --benchmark_output_directory results
+                  --num_repetitions 50
 
             {% endif %}
 
-            The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% endif %}
+            {% if model.model == "stable-diffusion-3.5-large" %}
+               cd StableDiffusion3.5 
+               mkdir results
+
+               torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
+                  --model {{ model.model_repo }} \
+                  --num_inference_steps 28 \
+                  --prompt "A capybara holding a sign that reads Hello World" \
+                  --use_torch_compile \
+                  --pipefusion_parallel_degree 4 \
+                  --use_cfg_parallel \
+                  --num_repetitions 50 \
+                  --dtype torch.float16 \
+                  --output_path results
+
+            {% endif %}
+
+            The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
 
             {% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
 
@@ -385,4 +397,4 @@ Previous versions
 =================
 
 See :doc:`benchmark-docker/previous-versions/xdit-history` to find documentation for previous releases
-of xDiT diffusion inference performance testing.
\ No newline at end of file
+of xDiT diffusion inference performance testing.