[Azure External CI] Disable Azure CI on ROCm (#5950 )

Update docs for xDiT diffusion inference 26.1 (#5955 )
* archive previous version * xDiT diffusion inference docker 26.1
2026-02-12 23:45:05 -05:00 · 2026-02-11 16:06:42 -05:00 · 2026-02-11 13:27:36 -05:00 · 2026-02-11 08:03:36 -05:00
7 changed files with 796 additions and 190 deletions
--- a/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
@@ -0,0 +1,105 @@
 docker:
  pull_tag: rocm/pytorch-xdit:v25.13
  docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
  ROCm: 7.11.0
  whats_new:
    - "Flux.1 Kontext support"
    - "Flux.2 Dev support"
    - "Flux FP8 GEMM support"
    - "Hybrid FP8 attention support for Wan models"
  components:
    TheRock: 
      version: 1728a81
      url: https://github.com/ROCm/TheRock
    rccl:
      version: d23d18f
      url: https://github.com/ROCm/rccl
    composable_kernel:
      version: ab0101c
      url: https://github.com/ROCm/composable_kernel
    rocm-libraries:
      version: a2f7c35
      url: https://github.com/ROCm/rocm-libraries
    rocm-systems:
      version: 659737c
      url: https://github.com/ROCm/rocm-systems
    torch:
      version: 91be249
      url: https://github.com/ROCm/pytorch
    torchvision:
      version: b919bd0
      url: https://github.com/pytorch/vision
    triton:
      version: a272dfa
      url: https://github.com/ROCm/triton
    accelerate:
      version: b521400f
      url: https://github.com/huggingface/accelerate
    aiter:
      version: de14bec0
      url: https://github.com/ROCm/aiter
    diffusers:
      version: a1f36ee3e
      url: https://github.com/huggingface/diffusers
    xfuser:
      version: adf2681
      url: https://github.com/xdit-project/xDiT
    yunchang:
      version: 2c9b712
      url: https://github.com/feifeibear/long-context-attention
  supported_models:
    - group: Hunyuan Video
      js_tag: hunyuan
      models:
        - model: Hunyuan Video
          model_repo: tencent/HunyuanVideo
          revision: refs/pr/18
          url: https://huggingface.co/tencent/HunyuanVideo
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
          js_tag: hunyuan_tag
    - group: Wan-AI
      js_tag: wan
      models:
        - model: Wan2.1
          model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
          js_tag: wan_21_tag
        - model: Wan2.2
          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
          js_tag: wan_22_tag
    - group: FLUX
      js_tag: flux
      models:
        - model: FLUX.1
          model_repo: black-forest-labs/FLUX.1-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
          js_tag: flux_1_tag
        - model: FLUX.1 Kontext
          model_repo: black-forest-labs/FLUX.1-Kontext-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux_kontext
          js_tag: flux_1_kontext_tag
        - model: FLUX.2
          model_repo: black-forest-labs/FLUX.2-dev
          url: https://huggingface.co/black-forest-labs/FLUX.2-dev
          github: https://github.com/black-forest-labs/flux2
          mad_tag: pyt_xdit_flux_2
          js_tag: flux_2_tag
    - group: StableDiffusion
      js_tag: stablediffusion
      models:
        - model: stable-diffusion-3.5-large
          model_repo: stabilityai/stable-diffusion-3.5-large
          url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
          github: https://github.com/Stability-AI/sd3.5
          mad_tag: pyt_xdit_sd_3_5
          js_tag: stable_diffusion_3_5_large_tag
--- a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
@@ -1,14 +1,13 @@
 docker:
-  pull_tag: rocm/pytorch-xdit:v25.13
+  pull_tag: rocm/pytorch-xdit:v26.1
-  docker_hub_url: https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef
+  docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
  ROCm: 7.11.0
  whats_new:
-    - "Flux.1 Kontext support"
+    - "HunyuanVideo 1.5 support"
-    - "Flux.2 Dev support"
+    - "Z-Image Turbo support"
-    - "Flux FP8 GEMM support"
+    - "Wan model sharding"
    - "Hybrid FP8 attention support for Wan models"
  components:
-    TheRock: 
+    TheRock:
      version: 1728a81
      url: https://github.com/ROCm/TheRock
    rccl:
@@ -39,10 +38,10 @@ docker:
      version: de14bec0
      url: https://github.com/ROCm/aiter
    diffusers:
-      version: a1f36ee3e
+      version: 6708f5
      url: https://github.com/huggingface/diffusers
    xfuser:
-      version: adf2681
+      version: 0a3d7a
      url: https://github.com/xdit-project/xDiT
    yunchang:
      version: 2c9b712
@@ -58,6 +57,49 @@ docker:
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo
          mad_tag: pyt_xdit_hunyuanvideo
          js_tag: hunyuan_tag
          benchmark_command:
            - cd /app/Hunyuanvideo
            - mkdir results
            - 'torchrun --nproc_per_node=8 run.py \'
            - '--model {model_repo} \'
            - '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
            - '--batch_size 1 \'
            - '--height 720 --width 1280 \'
            - '--seed 1168860793 \'
            - '--num_frames 129 \'
            - '--num_inference_steps 50 \'
            - '--warmup_steps 1 \'
            - '--n_repeats 1 \'
            - '--sleep_dur 10 \'
            - '--ulysses_degree 8 \'
            - '--enable_tiling --enable_slicing \'
            - '--guidance_scale 6.0 \'
            - '--use_torch_compile \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
        - model: Hunyuan Video 1.5
          model_repo: hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
          url: https://huggingface.co/hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-720p_t2v
          github: https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5
          mad_tag: pyt_xdit_hunyuanvideo_1_5
          js_tag: hunyuan_1_5_tag
          benchmark_command:
            - cd /app/Hunyuanvideo_1_5
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Hunyuanvideo_1_5/run.py \'
            - '--model {model_repo} \'
            - '--prompt "In the large cage, two puppies were wagging their tails at each other." \'
            - '--task t2v \'
            - '--height 720 --width 1280 \'
            - '--seed 1168860793 \'
            - '--num_frames 129 \'
            - '--num_inference_steps 50 \'
            - '--num_repetitions 1 \'
            - '--ulysses_degree 8 \'
            - '--enable_tiling --enable_slicing \'
            - '--use_torch_compile \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
    - group: Wan-AI
      js_tag: wan
      models:
@@ -67,12 +109,48 @@ docker:
          github: https://github.com/Wan-Video/Wan2.1
          mad_tag: pyt_xdit_wan_2_1
          js_tag: wan_21_tag
          benchmark_command:
            - cd /app/Wan
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
            - '--model {model_repo} \'
            - '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
            - '--task i2v \'
            - '--height 720 \'
            - '--width 1280 \'
            - '--img_file_path /app/Wan/i2v_input.JPG \'
            - '--num_frames 81 \'
            - '--ulysses_degree 8 \'
            - '--seed 42 \'
            - '--num_repetitions 1 \'
            - '--num_inference_steps 40 \'
            - '--use_torch_compile \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
        - model: Wan2.2
          model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
          url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
          github: https://github.com/Wan-Video/Wan2.2
          mad_tag: pyt_xdit_wan_2_2
          js_tag: wan_22_tag
          benchmark_command:
            - cd /app/Wan
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Wan/run.py \'
            - '--model {model_repo} \'
            - '--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline''s intricate details and the refreshing atmosphere of the seaside." \'
            - '--task i2v \'
            - '--height 720 \'
            - '--width 1280 \'
            - '--img_file_path /app/Wan/i2v_input.JPG \'
            - '--num_frames 81 \'
            - '--ulysses_degree 8 \'
            - '--seed 42 \'
            - '--num_repetitions 1 \'
            - '--num_inference_steps 40 \'
            - '--use_torch_compile \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
    - group: FLUX
      js_tag: flux
      models:
@@ -82,18 +160,79 @@ docker:
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux
          js_tag: flux_1_tag
          benchmark_command:
            - cd /app/Flux
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Flux/run.py \'
            - '--model {model_repo} \'
            - '--seed 42 \'
            - '--prompt "A small cat" \'
            - '--height 1024 \'
            - '--width 1024 \'
            - '--num_inference_steps 25 \'
            - '--max_sequence_length 256 \'
            - '--warmup_steps 5 \'
            - '--no_use_resolution_binning \'
            - '--ulysses_degree 8 \'
            - '--use_torch_compile \'
            - '--guidance_scale 0.0 \'
            - '--num_repetitions 50 \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
        - model: FLUX.1 Kontext
          model_repo: black-forest-labs/FLUX.1-Kontext-dev
          url: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
          github: https://github.com/black-forest-labs/flux
          mad_tag: pyt_xdit_flux_kontext
          js_tag: flux_1_kontext_tag
          benchmark_command:
            - cd /app/Flux
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
            - '--model {model_repo} \'
            - '--seed 42 \'
            - '--prompt "Add a cool hat to the cat" \'
            - '--height 1024 \'
            - '--width 1024 \'
            - '--num_inference_steps 30 \'
            - '--max_sequence_length 512 \'
            - '--warmup_steps 5 \'
            - '--no_use_resolution_binning \'
            - '--ulysses_degree 8 \'
            - '--use_torch_compile \'
            - '--img_file_path /app/Flux/cat.png \'
            - '--model_type flux_kontext \'
            - '--guidance_scale 2.5 \'
            - '--num_repetitions 25 \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
        - model: FLUX.2
          model_repo: black-forest-labs/FLUX.2-dev
          url: https://huggingface.co/black-forest-labs/FLUX.2-dev
          github: https://github.com/black-forest-labs/flux2
          mad_tag: pyt_xdit_flux_2
          js_tag: flux_2_tag
          benchmark_command:
            - cd /app/Flux
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/Flux/run_usp.py \'
            - '--model {model_repo} \'
            - '--seed 42 \'
            - '--prompt "Add a cool hat to the cat" \'
            - '--height 1024 \'
            - '--width 1024 \'
            - '--num_inference_steps 50 \'
            - '--max_sequence_length 512 \'
            - '--warmup_steps 5 \'
            - '--no_use_resolution_binning \'
            - '--ulysses_degree 8 \'
            - '--use_torch_compile \'
            - '--img_file_paths /app/Flux/cat.png \'
            - '--model_type flux2 \'
            - '--guidance_scale 4.0 \'
            - '--num_repetitions 25 \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
    - group: StableDiffusion
      js_tag: stablediffusion
      models:
@@ -103,3 +242,42 @@ docker:
          github: https://github.com/Stability-AI/sd3.5
          mad_tag: pyt_xdit_sd_3_5
          js_tag: stable_diffusion_3_5_large_tag
          benchmark_command:
            - cd /app/StableDiffusion3.5
            - mkdir results
            - 'torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \'
            - '--model {model_repo} \'
            - '--prompt "A capybara holding a sign that reads Hello World" \'
            - '--num_repetitions 50 \'
            - '--num_inference_steps 28 \'
            - '--pipefusion_parallel_degree 4 \'
            - '--use_cfg_parallel \'
            - '--use_torch_compile \'
            - '--dtype torch.float16 \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
    - group: Z-Image
      js_tag: z_image
      models:
        - model: Z-Image Turbo
          model_repo: Tongyi-MAI/Z-Image-Turbo
          url: https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
          github: https://github.com/Tongyi-MAI/Z-Image
          mad_tag: pyt_xdit_z_image_turbo
          js_tag: z_image_turbo_tag
          benchmark_command:
            - cd /app/Z-Image
            - mkdir results
            - 'torchrun --nproc_per_node=2 /app/Z-Image/run.py \'
            - '--model {model_repo} \'
            - '--seed 42 \'
            - '--prompt "A crowded beach" \'
            - '--height 1088 \'
            - '--width 1920 \'
            - '--num_inference_steps 9 \'
            - '--ulysses_degree 2 \'
            - '--use_torch_compile \'
            - '--guidance_scale 0.0 \'
            - '--num_repetitions 50 \'
            - '--attention_backend aiter \'
            - '--benchmark_output_directory results'
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.12.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.12.rst
@@ -11,7 +11,7 @@ xDiT diffusion inference
 .. caution::
-   This documentation does not reflect the latest version of ROCm vLLM
+   This documentation does not reflect the latest version of xDiT diffusion
   inference performance documentation. See
   :doc:`/how-to/rocm-for-ai/inference/xdit-diffusion-inference` for the latest
   version.
@@ -293,7 +293,7 @@ Run inference
                      --tags {{model.mad_tag}} \
                      --keep-model-dir \
                      --live-output
-                     
+
            MAD launches a Docker container with the name
            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
@@ -379,7 +379,7 @@ Run inference
            {% endif %}
            {% if model.model == "stable-diffusion-3.5-large" %}
-               cd StableDiffusion3.5 
+               cd StableDiffusion3.5
               mkdir results
               torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.13.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-25.13.rst
@@ -0,0 +1,474 @@
 :orphan:
 .. meta::
   :description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
                 prebuilt and optimized docker images.
   :keywords: xDiT, diffusion, video, video generation, image, image generation, validate, benchmark
 ************************
 xDiT diffusion inference
 ************************
 .. caution::
   This documentation does not reflect the latest version of the xDiT diffusion
   inference performance documentation. See
   :doc:`/how-to/rocm-for-ai/inference/xdit-diffusion-inference` for the latest
   version.
 .. _xdit-video-diffusion-2513:
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
   a prebuilt, optimized environment based on `xDiT
   <https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
   video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
   and MI300X (gfx942) GPUs.
   The image runs a preview version of ROCm using the new `TheRock
   <https://github.com/ROCm/TheRock>`__ build system and includes the following
   components:
   .. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
      .. list-table::
         :header-rows: 1
         * - Software component
           - Version
         {% for component_name, component_data in docker.components.items() %}
         * - `{{ component_name }} <{{ component_data.url }}>`_
           - {{ component_data.version }}
         {% endfor %}
 Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
 For preview and development releases, see `amdsiloai/pytorch-xdit <https://hub.docker.com/r/amdsiloai/pytorch-xdit>`_.
 What's new
 ==========
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   {% for item in docker.whats_new %}
   * {{ item }}
   {% endfor %}
 .. _xdit-video-diffusion-supported-models-2513:
 Supported models
 ================
 The following models are supported for inference performance benchmarking.
 Some instructions, commands, and recommendations in this documentation might
 vary by model -- select one to get started.
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   .. raw:: html
      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
          <div class="row gx-0">
              <div class="col-2 me-1 px-2 model-param-head">Model</div>
              <div class="row col-10 pe-0">
        {% for model_group in docker.supported_models %}
               <div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.js_tag }}" tabindex="0">{{ model_group.group }}</div>
        {% endfor %}
              </div>
          </div>
          <div class="row gx-0 pt-1">
              <div class="col-2 me-1 px-2 model-param-head">Variant</div>
              <div class="row col-10 pe-0">
        {% for model_group in docker.supported_models %}
            {% set models = model_group.models %}
            {% for model in models %}
                {% if models|length % 3 == 0 %}
                <div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
                {% else %}
                <div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
                {% endif %}
            {% endfor %}
        {% endfor %}
              </div>
          </div>
      </div>
   {% for model_group in docker.supported_models %}
       {% for model in model_group.models %}
   .. container:: model-doc {{ model.js_tag }}
      .. note::
         To learn more about your specific model see the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_
         or visit the `GitHub page <{{ model.github }}>`__. Note that some models require access authorization before use via an
         external license agreement through a third party.
       {% endfor %}
   {% endfor %}
 Performance measurements
 ========================
 To evaluate performance, the `Performance results with AMD ROCm software
 <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
 page provides reference throughput and serving measurements for inferencing popular AI models.
 .. important::
   The performance data presented in `Performance results with AMD ROCm
   software
   <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
   only reflects the latest version of this inference benchmarking environment.
   The listed measurements should not be interpreted as the peak performance
   achievable by AMD Instinct GPUs or ROCm software.
 System validation
 =================
 Before running AI workloads, it's important to validate that your AMD hardware is configured
 correctly and performing optimally.
 If you have already validated your system settings, including aspects like NUMA auto-balancing, you
 can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
 optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
 before starting.
 To test for optimal performance, consult the recommended :ref:`System health benchmarks
 <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
 system's configuration.
 Pull the Docker image
 =====================
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
   Pull the image using the following command:
   .. code-block:: shell
      docker pull {{ docker.pull_tag }}
 Validate and benchmark
 ======================
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   Once the image has been downloaded you can follow these steps to
   run benchmarks and generate outputs.
   {% for model_group in docker.supported_models %}
     {% for model in model_group.models %}
   .. container:: model-doc {{model.js_tag}}
      The following commands are written for {{ model.model }}.
      See :ref:`xdit-video-diffusion-supported-models-2513` to switch to another available model.
     {% endfor %}
   {% endfor %}
 Choose your setup method
 ------------------------
 You can either use an existing Hugging Face cache or download the model fresh inside the container.
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   {% for model_group in docker.supported_models %}
     {% for model in model_group.models %}
   .. container:: model-doc {{model.js_tag}}
      .. tab-set::
         .. tab-item:: Option 1: Use existing Hugging Face cache
            If you already have models downloaded on your host system, you can mount your existing cache.
            1. Set your Hugging Face cache location.
               .. code-block:: shell
                  export HF_HOME=/your/hf_cache/location
            2. Download the model (if not already cached).
               .. code-block:: shell
                  huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %}
            3. Launch the container with mounted cache.
               .. code-block:: shell
                  docker run \
                      -it --rm \
                      --cap-add=SYS_PTRACE \
                      --security-opt seccomp=unconfined \
                      --user root \
                      --device=/dev/kfd \
                      --device=/dev/dri \
                      --group-add video \
                      --ipc=host \
                      --network host \
                      --privileged \
                      --shm-size 128G \
                      --name pytorch-xdit \
                      -e HSA_NO_SCRATCH_RECLAIM=1 \
                      -e OMP_NUM_THREADS=16 \
                      -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
                      -e HF_HOME=/app/huggingface_models \
                      -v $HF_HOME:/app/huggingface_models \
                      {{ docker.pull_tag }}
         .. tab-item:: Option 2: Download inside container
            If you prefer to keep the container self-contained or don't have an existing cache.
            1. Launch the container
               .. code-block:: shell
                  docker run \
                      -it --rm \
                      --cap-add=SYS_PTRACE \
                      --security-opt seccomp=unconfined \
                      --user root \
                      --device=/dev/kfd \
                      --device=/dev/dri \
                      --group-add video \
                      --ipc=host \
                      --network host \
                      --privileged \
                      --shm-size 128G \
                      --name pytorch-xdit \
                      -e HSA_NO_SCRATCH_RECLAIM=1 \
                      -e OMP_NUM_THREADS=16 \
                      -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
                      {{ docker.pull_tag }}
            2. Inside the container, set the Hugging Face cache location and download the model.
               .. code-block:: shell
                  export HF_HOME=/app/huggingface_models
                  huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %}
               .. warning::
                  Models will be downloaded to the container's filesystem and will be lost when the container is removed unless you persist the data with a volume.
     {% endfor %}
   {% endfor %}
 Run inference
 =============
 .. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/previous-versions/xdit_25.13-inference-models.yaml
   {% set docker = data.docker %}
   {% for model_group in docker.supported_models %}
     {% for model in model_group.models %}
   .. container:: model-doc {{ model.js_tag }}
      .. tab-set::
         .. tab-item:: MAD-integrated benchmarking
            1. Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
               directory and install the required packages on the host machine.
               .. code-block:: shell
                  git clone https://github.com/ROCm/MAD
                  cd MAD
                  pip install -r requirements.txt
            2. On the host machine, use this command to run the performance benchmark test on
               the `{{model.model}} <{{ model.url }}>`_ model using one node.
               .. code-block:: shell
                  export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
                  madengine run \
                      --tags {{model.mad_tag}} \
                      --keep-model-dir \
                      --live-output
            MAD launches a Docker container with the name
            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
            and ``{{ model.mad_tag }}_serving.csv``.
         .. tab-item:: Standalone benchmarking
            To run the benchmarks for {{ model.model }}, use the following command:
            .. code-block:: shell
            {% if model.model == "Hunyuan Video" %}
               cd /app/Hunyuanvideo
               mkdir results
               torchrun --nproc_per_node=8 run.py \
                  --model {{ model.model_repo }} \
                  --prompt "In the large cage, two puppies were wagging their tails at each other." \
                  --height 720 --width 1280 --num_frames 129 \
                  --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
                  --ulysses_degree 8 \
                  --enable_tiling --enable_slicing \
                  --use_torch_compile \
                  --bench_output results
            {% endif %}
            {% if model.model == "Wan2.1" %}
               cd /app/Wan
               mkdir results
               torchrun --nproc_per_node=8 /app/Wan/run.py \
                  --task i2v \
                  --height 720 \
                  --width 1280 \
                  --model {{ model.model_repo }} \
                  --img_file_path /app/Wan/i2v_input.JPG \
                  --ulysses_degree 8 \
                  --seed 42 \
                  --num_frames 81 \
                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
                  --num_repetitions 1 \
                  --num_inference_steps 40 \
                  --use_torch_compile
            {% endif %}
            {% if model.model == "Wan2.2" %}
               cd /app/Wan
               mkdir results
               torchrun --nproc_per_node=8 /app/Wan/run.py \
                  --task i2v \
                  --height 720 \
                  --width 1280 \
                  --model {{ model.model_repo }} \
                  --img_file_path /app/Wan/i2v_input.JPG \
                  --ulysses_degree 8 \
                  --seed 42 \
                  --num_frames 81 \
                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
                  --num_repetitions 1 \
                  --num_inference_steps 40 \
                  --use_torch_compile
            {% endif %}
            {% if model.model == "FLUX.1" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "A small cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 25 \
                  --max_sequence_length 256 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --num_repetitions 50
            {% endif %}
            {% if model.model == "FLUX.1 Kontext" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "Add a cool hat to the cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 30 \
                  --max_sequence_length 512 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --img_file_path /app/Flux/cat.png \
                  --model_type flux_kontext \
                  --guidance_scale 2.5 \
                  --num_repetitions 25
            {% endif %}
            {% if model.model == "FLUX.2" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "Add a cool hat to the cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 50 \
                  --max_sequence_length 512 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --img_file_paths /app/Flux/cat.png \
                  --model_type flux2 \
                  --guidance_scale 4.0 \
                  --num_repetitions 25
            {% endif %}
            {% if model.model == "stable-diffusion-3.5-large" %}
               cd /app/StableDiffusion3.5
               mkdir results
               torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
                  --model {{ model.model_repo }} \
                  --num_inference_steps 28 \
                  --prompt "A capybara holding a sign that reads Hello World" \
                  --use_torch_compile \
                  --pipefusion_parallel_degree 4 \
                  --use_cfg_parallel \
                  --num_repetitions 50 \
                  --dtype torch.float16 \
                  --output_path results
            {% endif %}
            The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
            {% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
      {% endfor %}
    {% endfor %}
 Previous versions
 =================
 See
 :doc:`/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history`
 to find documentation for previous releases of xDiT diffusion inference
 performance testing.
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/xdit-history.rst
@@ -15,33 +15,40 @@ benchmarking, see the version-specific documentation.
     - Components
     - Resources
-   * - ``rocm/pytorch-xdit:v25.13`` (latest)
+   * - ``rocm/pytorch-xdit:v26.1``
-     - 
+     -
       * TheRock 1728a81
-     - 
+     -
       * :doc:`Documentation <../../xdit-diffusion-inference>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v26.1/images/sha256-4e35ebcad47042a41389b992ecb3489b3b0a922e4c34c7a0dd1098733a3db513>`__
   * - ``rocm/pytorch-xdit:v25.13``
     -
       * TheRock 1728a81
     -
       * :doc:`Documentation <xdit-25.13>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.13/images/sha256-81954713070d67bde08595e03f62110c8a3dd66a9ae17a77d611e01f83f0f4ef>`__
   * - ``rocm/pytorch-xdit:v25.12``
-     - 
+     -
       * `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
       * TheRock 3e3f834
-     - 
+     -
       * :doc:`Documentation <xdit-25.12>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.12/images/sha256-e06895132316bf3c393366b70a91eaab6755902dad0100e6e2b38310547d9256>`__
   * - ``rocm/pytorch-xdit:v25.11``
-     - 
+     -
       * `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
       * TheRock 3e3f834
-     - 
+     -
       * :doc:`Documentation <xdit-25.11>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.11/images/sha256-c9fa659439bb024f854b4d5eea598347251b02c341c55f66c98110832bde4216>`__
   * - ``rocm/pytorch-xdit:v25.10``
-     - 
+     -
       * `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
       * TheRock 7afbe45
-     - 
+     -
       * :doc:`Documentation <xdit-25.10>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-xdit/v25.10/images/sha256-d79715ff18a9470e3f907cec8a9654d6b783c63370b091446acffc0de4d7070e>`__
--- a/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
+++ b/docs/how-to/rocm-for-ai/inference/xdit-diffusion-inference.rst
@@ -13,15 +13,10 @@ xDiT diffusion inference
   {% set docker = data.docker %}
-   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
+   The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
-   a prebuilt, optimized environment based on `xDiT
+   benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
-   <https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
+   The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
-   video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
+   and includes the following components:
   and MI300X (gfx942) GPUs.
   The image runs a preview version of ROCm using the new `TheRock
   <https://github.com/ROCm/TheRock>`__ build system and includes the following
   components:
   .. dropdown:: Software components - {{ docker.pull_tag.split('-')|last }}
@@ -105,22 +100,6 @@ vary by model -- select one to get started.
       {% endfor %}
   {% endfor %}
 Performance measurements
 ========================
 To evaluate performance, the `Performance results with AMD ROCm software
 <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
 page provides reference throughput and serving measurements for inferencing popular AI models.
 .. important::
   The performance data presented in `Performance results with AMD ROCm
   software
   <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8543b7e6d-item-9eda09e707-tab>`__
   only reflects the latest version of this inference benchmarking environment.
   The listed measurements should not be interpreted as the peak performance
   achievable by AMD Instinct GPUs or ROCm software.
 System validation
 =================
@@ -300,7 +279,7 @@ Run inference
                      --tags {{model.mad_tag}} \
                      --keep-model-dir \
                      --live-output
-                     
+
            MAD launches a Docker container with the name
            ``container_ci-{{model.mad_tag}}``. The throughput and serving reports of the
            model are collected in the following paths: ``{{ model.mad_tag }}_throughput.csv``
@@ -311,152 +290,15 @@ Run inference
            To run the benchmarks for {{ model.model }}, use the following command:
            .. code-block:: shell
            {% if model.model == "Hunyuan Video" %}
               cd /app/Hunyuanvideo
               mkdir results
-               torchrun --nproc_per_node=8 run.py \
+               {{ model.benchmark_command
-                  --model {{ model.model_repo }} \
+                  | map('replace', '{model_repo}', model.model_repo)
-                  --prompt "In the large cage, two puppies were wagging their tails at each other." \
+                  | map('trim')
-                  --height 720 --width 1280 --num_frames 129 \
+                  | join('\n               ') }}
                  --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
                  --ulysses_degree 8 \
                  --enable_tiling --enable_slicing \
                  --use_torch_compile \
                  --bench_output results
-            {% endif %}
+            The generated video will be stored under the results directory.
            {% if model.model == "Wan2.1" %}
               cd /app/Wan
               mkdir results
               torchrun --nproc_per_node=8 /app/Wan/run.py \
                  --task i2v \
                  --height 720 \
                  --width 1280 \
                  --model {{ model.model_repo }} \
                  --img_file_path /app/Wan/i2v_input.JPG \
                  --ulysses_degree 8 \
                  --seed 42 \
                  --num_frames 81 \
                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
                  --num_repetitions 1 \
                  --num_inference_steps 40 \
                  --use_torch_compile
            {% endif %}
            {% if model.model == "Wan2.2" %}
               cd /app/Wan
               mkdir results
               torchrun --nproc_per_node=8 /app/Wan/run.py \
                  --task i2v \
                  --height 720 \
                  --width 1280 \
                  --model {{ model.model_repo }} \
                  --img_file_path /app/Wan/i2v_input.JPG \
                  --ulysses_degree 8 \
                  --seed 42 \
                  --num_frames 81 \
                  --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
                  --num_repetitions 1 \
                  --num_inference_steps 40 \
                  --use_torch_compile
            {% endif %}
            {% if model.model == "FLUX.1" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "A small cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 25 \
                  --max_sequence_length 256 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --num_repetitions 50
            {% endif %}
            {% if model.model == "FLUX.1 Kontext" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "Add a cool hat to the cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 30 \
                  --max_sequence_length 512 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --img_file_path /app/Flux/cat.png \
                  --model_type flux_kontext \
                  --guidance_scale 2.5 \
                  --num_repetitions 25
            {% endif %}
            {% if model.model == "FLUX.2" %}
               cd /app/Flux
               mkdir results
               torchrun --nproc_per_node=8 /app/Flux/run_usp.py \
                  --model {{ model.model_repo }} \
                  --seed 42 \
                  --prompt "Add a cool hat to the cat" \
                  --height 1024 \
                  --width 1024 \
                  --num_inference_steps 50 \
                  --max_sequence_length 512 \
                  --warmup_steps 5 \
                  --no_use_resolution_binning \
                  --ulysses_degree 8 \
                  --use_torch_compile \
                  --img_file_paths /app/Flux/cat.png \
                  --model_type flux2 \
                  --guidance_scale 4.0 \
                  --num_repetitions 25
            {% endif %}
            {% if model.model == "stable-diffusion-3.5-large" %}
               cd /app/StableDiffusion3.5 
               mkdir results
               torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
                  --model {{ model.model_repo }} \
                  --num_inference_steps 28 \
                  --prompt "A capybara holding a sign that reads Hello World" \
                  --use_torch_compile \
                  --pipefusion_parallel_degree 4 \
                  --use_cfg_parallel \
                  --num_repetitions 50 \
                  --dtype torch.float16 \
                  --output_path results
            {% endif %}
            The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model in ["FLUX.1", "FLUX.1 Kontext", "FLUX.2"] %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
            {% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
      {% endfor %}
    {% endfor %}
 Previous versions
 =================
 See :doc:`benchmark-docker/previous-versions/xdit-history` to find documentation for previous releases
 of xDiT diffusion inference performance testing.
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -37,7 +37,7 @@ click==8.3.1
    #   sphinx-external-toc
 comm==0.2.3
    # via ipykernel
-cryptography==46.0.3
+cryptography==46.0.5
    # via pyjwt
 debugpy==1.8.19
    # via ipykernel