mirror of
https://github.com/ROCm/ROCm.git
synced 2026-01-08 22:28:06 -05:00
xDiT diffusion inference v25.12 documentation update (#5786)
* Add xdit-diffusion ROCm docs page. * Update template formatting and fix sphinx warnings * Add System Validation section. * Add sw component versions/commits. * Update to use latest v25.10 image instead of v25.9 * Update commands and add FLUX instructions. * Update Flux instructions. Change image tag. Describe as diffusion inference instead of specifically video. * git rm xdit-video-diffusion.rst * Docs for v25.12 * Add hyperlinks to components * Command fixes * -Diffusers suffix * Simplify yaml file and cleanup main rst page. * Spelling, added 'js' * fix merge conflict fix --------- Co-authored-by: Kristoffer <kristoffer.torp@amd.com>
This commit is contained in:
@@ -261,6 +261,7 @@ Ioffe
|
|||||||
JAX's
|
JAX's
|
||||||
JAXLIB
|
JAXLIB
|
||||||
Jinja
|
Jinja
|
||||||
|
js
|
||||||
JSON
|
JSON
|
||||||
Jupyter
|
Jupyter
|
||||||
KFD
|
KFD
|
||||||
|
|||||||
@@ -1,109 +1,91 @@
|
|||||||
xdit_diffusion_inference:
|
|
||||||
docker:
|
docker:
|
||||||
- version: v25-11
|
pull_tag: rocm/pytorch-xdit:v25.12
|
||||||
pull_tag: rocm/pytorch-xdit:v25.11
|
|
||||||
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
||||||
ROCm: 7.10.0
|
ROCm: 7.10.0
|
||||||
|
whats_new:
|
||||||
|
- "Adds T2V and TI2V support for Wan models."
|
||||||
|
- "Adds support for SD-3.5 T2I model."
|
||||||
|
components:
|
||||||
|
TheRock:
|
||||||
|
version: 3e3f834
|
||||||
|
url: https://github.com/ROCm/TheRock
|
||||||
|
rccl:
|
||||||
|
version: d23d18f
|
||||||
|
url: https://github.com/ROCm/rccl
|
||||||
|
composable_kernel:
|
||||||
|
version: 2570462
|
||||||
|
url: https://github.com/ROCm/composable_kernel
|
||||||
|
rocm-libraries:
|
||||||
|
version: 0588f07
|
||||||
|
url: https://github.com/ROCm/rocm-libraries
|
||||||
|
rocm-systems:
|
||||||
|
version: 473025a
|
||||||
|
url: https://github.com/ROCm/rocm-systems
|
||||||
|
torch:
|
||||||
|
version: 73adac
|
||||||
|
url: https://github.com/pytorch/pytorch
|
||||||
|
torchvision:
|
||||||
|
version: f5c6c2e
|
||||||
|
url: https://github.com/pytorch/vision
|
||||||
|
triton:
|
||||||
|
version: 7416ffc
|
||||||
|
url: https://github.com/triton-lang/triton
|
||||||
|
accelerate:
|
||||||
|
version: 34c1779
|
||||||
|
url: https://github.com/huggingface/accelerate
|
||||||
|
aiter:
|
||||||
|
version: de14bec
|
||||||
|
url: https://github.com/ROCm/aiter
|
||||||
|
diffusers:
|
||||||
|
version: 40528e9
|
||||||
|
url: https://github.com/huggingface/diffusers
|
||||||
|
xfuser:
|
||||||
|
version: ccba9d5
|
||||||
|
url: https://github.com/xdit-project/xDiT
|
||||||
|
yunchang:
|
||||||
|
version: 2c9b712
|
||||||
|
url: https://github.com/feifeibear/long-context-attention
|
||||||
supported_models:
|
supported_models:
|
||||||
- group: Hunyuan Video
|
- group: Hunyuan Video
|
||||||
models:
|
js_tag: hunyuan
|
||||||
- Hunyuan Video
|
|
||||||
- group: Wan-AI
|
|
||||||
models:
|
|
||||||
- Wan2.1
|
|
||||||
- Wan2.2
|
|
||||||
- group: FLUX
|
|
||||||
models:
|
|
||||||
- FLUX.1
|
|
||||||
whats_new:
|
|
||||||
- "Minor bug fixes and clarifications to READMEs."
|
|
||||||
- "Bumps TheRock, AITER, Diffusers, xDiT versions."
|
|
||||||
- "Changes Aiter rounding mode for faster gfx942 FWD Attention."
|
|
||||||
components:
|
|
||||||
TheRock: 3e3f834
|
|
||||||
rccl: d23d18f
|
|
||||||
composable_kernel: 2570462
|
|
||||||
rocm-libraries: 0588f07
|
|
||||||
rocm-systems: 473025a
|
|
||||||
torch: 73adac
|
|
||||||
torchvision: f5c6c2e
|
|
||||||
triton: 7416ffc
|
|
||||||
accelerate: 34c1779
|
|
||||||
aiter: de14bec
|
|
||||||
diffusers: 40528e9
|
|
||||||
xfuser: 83978b5
|
|
||||||
yunchang: 2c9b712
|
|
||||||
|
|
||||||
- version: v25-10
|
|
||||||
pull_tag: rocm/pytorch-xdit:v25.10
|
|
||||||
docker_hub_url: https://hub.docker.com/r/rocm/pytorch-xdit
|
|
||||||
ROCm: 7.9.0
|
|
||||||
supported_models:
|
|
||||||
- group: Hunyuan Video
|
|
||||||
models:
|
|
||||||
- Hunyuan Video
|
|
||||||
- group: Wan-AI
|
|
||||||
models:
|
|
||||||
- Wan2.1
|
|
||||||
- Wan2.2
|
|
||||||
- group: FLUX
|
|
||||||
models:
|
|
||||||
- FLUX.1
|
|
||||||
whats_new:
|
|
||||||
- "First official xDiT Docker Release for Diffusion Inference."
|
|
||||||
- "Supports gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X)."
|
|
||||||
- "Support Wan 2.1, Wan 2.2, HunyuanVideo and Flux workloads."
|
|
||||||
components:
|
|
||||||
TheRock: 7afbe45
|
|
||||||
rccl: 9b04b2a
|
|
||||||
composable_kernel: b7a806f
|
|
||||||
rocm-libraries: f104555
|
|
||||||
rocm-systems: 25922d0
|
|
||||||
torch: 2.10.0a0+gite9c9017
|
|
||||||
torchvision: 0.22.0a0+966da7e
|
|
||||||
triton: 3.5.0+git52e49c12
|
|
||||||
accelerate: 1.11.0.dev0
|
|
||||||
aiter: 0.1.5.post4.dev20+ga25e55e79
|
|
||||||
diffusers: 0.36.0.dev0
|
|
||||||
xfuser: 0.4.4
|
|
||||||
yunchang: 0.6.3.post1
|
|
||||||
|
|
||||||
model_groups:
|
|
||||||
- group: Hunyuan Video
|
|
||||||
tag: hunyuan
|
|
||||||
models:
|
models:
|
||||||
- model: Hunyuan Video
|
- model: Hunyuan Video
|
||||||
page_tag: hunyuan_tag
|
|
||||||
model_name: hunyuanvideo
|
|
||||||
model_repo: tencent/HunyuanVideo
|
model_repo: tencent/HunyuanVideo
|
||||||
revision: refs/pr/18
|
revision: refs/pr/18
|
||||||
url: https://huggingface.co/tencent/HunyuanVideo
|
url: https://huggingface.co/tencent/HunyuanVideo
|
||||||
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
github: https://github.com/Tencent-Hunyuan/HunyuanVideo
|
||||||
mad_tag: pyt_xdit_hunyuanvideo
|
mad_tag: pyt_xdit_hunyuanvideo
|
||||||
|
js_tag: hunyuan_tag
|
||||||
- group: Wan-AI
|
- group: Wan-AI
|
||||||
tag: wan
|
js_tag: wan
|
||||||
models:
|
models:
|
||||||
- model: Wan2.1
|
- model: Wan2.1
|
||||||
page_tag: wan_21_tag
|
model_repo: Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||||
model_name: wan2_1-i2v-14b-720p
|
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
|
||||||
model_repo: Wan-AI/Wan2.1-I2V-14B-720P
|
|
||||||
url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P
|
|
||||||
github: https://github.com/Wan-Video/Wan2.1
|
github: https://github.com/Wan-Video/Wan2.1
|
||||||
mad_tag: pyt_xdit_wan_2_1
|
mad_tag: pyt_xdit_wan_2_1
|
||||||
|
js_tag: wan_21_tag
|
||||||
- model: Wan2.2
|
- model: Wan2.2
|
||||||
page_tag: wan_22_tag
|
model_repo: Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
model_name: wan2_2-i2v-a14b
|
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers
|
||||||
model_repo: Wan-AI/Wan2.2-I2V-A14B
|
|
||||||
url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B
|
|
||||||
github: https://github.com/Wan-Video/Wan2.2
|
github: https://github.com/Wan-Video/Wan2.2
|
||||||
mad_tag: pyt_xdit_wan_2_2
|
mad_tag: pyt_xdit_wan_2_2
|
||||||
|
js_tag: wan_22_tag
|
||||||
- group: FLUX
|
- group: FLUX
|
||||||
tag: flux
|
js_tag: flux
|
||||||
models:
|
models:
|
||||||
- model: FLUX.1
|
- model: FLUX.1
|
||||||
page_tag: flux_1_tag
|
|
||||||
model_name: FLUX.1-dev
|
|
||||||
model_repo: black-forest-labs/FLUX.1-dev
|
model_repo: black-forest-labs/FLUX.1-dev
|
||||||
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
url: https://huggingface.co/black-forest-labs/FLUX.1-dev
|
||||||
github: https://github.com/black-forest-labs/flux
|
github: https://github.com/black-forest-labs/flux
|
||||||
mad_tag: pyt_xdit_flux
|
mad_tag: pyt_xdit_flux
|
||||||
|
js_tag: flux_1_tag
|
||||||
|
- group: Stable Diffusion
|
||||||
|
js_tag: stablediffusion
|
||||||
|
models:
|
||||||
|
- model: stable-diffusion-3.5-large
|
||||||
|
model_repo: stabilityai/stable-diffusion-3.5-large
|
||||||
|
url: https://huggingface.co/stabilityai/stable-diffusion-3.5-large
|
||||||
|
github: https://github.com/Stability-AI/sd3.5
|
||||||
|
mad_tag: pyt_xdit_sd_3_5
|
||||||
|
js_tag: stable_diffusion_3_5_large_tag
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
:orphan:
|
||||||
|
|
||||||
.. meta::
|
.. meta::
|
||||||
:description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
|
:description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using
|
||||||
prebuilt and optimized docker images.
|
prebuilt and optimized docker images.
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ benchmarking, see the version-specific documentation.
|
|||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.11`` (latest)
|
* - ``rocm/pytorch-xdit:v25.11`` (latest)
|
||||||
-
|
-
|
||||||
* ROCm 7.10.0 preview
|
* `ROCm 7.10.0 preview <https://rocm.docs.amd.com/en/7.10.0-preview/about/release-notes.html>`__
|
||||||
* TheRock 3e3f834
|
* TheRock 3e3f834
|
||||||
* rccl d23d18f
|
* rccl d23d18f
|
||||||
* composable_kernel 2570462
|
* composable_kernel 2570462
|
||||||
@@ -37,7 +37,7 @@ benchmarking, see the version-specific documentation.
|
|||||||
|
|
||||||
* - ``rocm/pytorch-xdit:v25.10``
|
* - ``rocm/pytorch-xdit:v25.10``
|
||||||
-
|
-
|
||||||
* ROCm 7.9.0 preview
|
* `ROCm 7.9.0 preview <https://rocm.docs.amd.com/en/7.9.0-preview/about/release-notes.html>`__
|
||||||
* TheRock 7afbe45
|
* TheRock 7afbe45
|
||||||
* rccl 9b04b2a
|
* rccl 9b04b2a
|
||||||
* composable_kernel b7a806f
|
* composable_kernel b7a806f
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ training, fine-tuning, and inference. It leverages popular machine learning fram
|
|||||||
|
|
||||||
- :doc:`SGLang inference performance testing <benchmark-docker/sglang>`
|
- :doc:`SGLang inference performance testing <benchmark-docker/sglang>`
|
||||||
|
|
||||||
- :doc:`Deploying your model <deploy-your-model>`
|
|
||||||
|
|
||||||
- :doc:`xDiT diffusion inference <xdit-diffusion-inference>`
|
- :doc:`xDiT diffusion inference <xdit-diffusion-inference>`
|
||||||
|
|
||||||
|
- :doc:`Deploying your model <deploy-your-model>`
|
||||||
|
|||||||
@@ -11,11 +11,14 @@ xDiT diffusion inference
|
|||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
|
{% set docker = data.docker %}
|
||||||
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
|
|
||||||
|
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers
|
||||||
|
a prebuilt, optimized environment based on `xDiT
|
||||||
|
<https://github.com/xdit-project/xDiT>`_ for benchmarking diffusion model
|
||||||
|
video and image generation on AMD Instinct MI355X, MI350X (gfx950), MI325X,
|
||||||
|
and MI300X (gfx942) GPUs.
|
||||||
|
|
||||||
The `rocm/pytorch-xdit <{{ docker.docker_hub_url }}>`_ Docker image offers a prebuilt, optimized environment based on `xDiT <https://github.com/xdit-project/xDiT>`_ for
|
|
||||||
benchmarking diffusion model video and image generation on gfx942 and gfx950 series (AMD Instinct™ MI300X, MI325X, MI350X, and MI355X) GPUs.
|
|
||||||
The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
|
The image runs ROCm **{{docker.ROCm}}** (preview) based on `TheRock <https://github.com/ROCm/TheRock>`_
|
||||||
and includes the following components:
|
and includes the following components:
|
||||||
|
|
||||||
@@ -27,9 +30,9 @@ xDiT diffusion inference
|
|||||||
* - Software component
|
* - Software component
|
||||||
- Version
|
- Version
|
||||||
|
|
||||||
{% for component_name, component_version in docker.components.items() %}
|
{% for component_name, component_data in docker.components.items() %}
|
||||||
* - {{ component_name }}
|
* - `{{ component_name }} <{{ component_data.url }}>`_
|
||||||
- {{ component_version }}
|
- {{ component_data.version }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
|
Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark.
|
||||||
@@ -37,10 +40,10 @@ For preview and development releases, see `amdsiloai/pytorch-xdit <https://hub.d
|
|||||||
|
|
||||||
What's new
|
What's new
|
||||||
==========
|
==========
|
||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
|
{% set docker = data.docker %}
|
||||||
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
|
|
||||||
|
|
||||||
{% for item in docker.whats_new %}
|
{% for item in docker.whats_new %}
|
||||||
* {{ item }}
|
* {{ item }}
|
||||||
@@ -57,14 +60,7 @@ vary by model -- select one to get started.
|
|||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
|
{% set docker = data.docker %}
|
||||||
{% set model_groups = data.xdit_diffusion_inference.model_groups %}
|
|
||||||
|
|
||||||
{# Create a lookup for supported models #}
|
|
||||||
{% set supported_lookup = {} %}
|
|
||||||
{% for supported in docker.supported_models %}
|
|
||||||
{% set _ = supported_lookup.update({supported.group: supported.models}) %}
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
.. raw:: html
|
.. raw:: html
|
||||||
|
|
||||||
@@ -72,10 +68,8 @@ vary by model -- select one to get started.
|
|||||||
<div class="row gx-0">
|
<div class="row gx-0">
|
||||||
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
<div class="col-2 me-1 px-2 model-param-head">Model</div>
|
||||||
<div class="row col-10 pe-0">
|
<div class="row col-10 pe-0">
|
||||||
{% for model_group in model_groups %}
|
{% for model_group in docker.supported_models %}
|
||||||
{% if model_group.group in supported_lookup %}
|
<div class="col-6 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.js_tag }}" tabindex="0">{{ model_group.group }}</div>
|
||||||
<div class="col-4 px-2 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
|
|
||||||
{% endif %}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -83,29 +77,24 @@ vary by model -- select one to get started.
|
|||||||
<div class="row gx-0 pt-1">
|
<div class="row gx-0 pt-1">
|
||||||
<div class="col-2 me-1 px-2 model-param-head">Variant</div>
|
<div class="col-2 me-1 px-2 model-param-head">Variant</div>
|
||||||
<div class="row col-10 pe-0">
|
<div class="row col-10 pe-0">
|
||||||
{% for model_group in model_groups %}
|
{% for model_group in docker.supported_models %}
|
||||||
{% if model_group.group in supported_lookup %}
|
|
||||||
{% set supported_models = supported_lookup[model_group.group] %}
|
|
||||||
{% set models = model_group.models %}
|
{% set models = model_group.models %}
|
||||||
{% for model in models %}
|
{% for model in models %}
|
||||||
{% if model.model in supported_models %}
|
|
||||||
{% if models|length % 3 == 0 %}
|
{% if models|length % 3 == 0 %}
|
||||||
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
<div class="col-4 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.page_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
|
<div class="col-6 px-2 model-param" data-param-k="model" data-param-v="{{ model.js_tag }}" data-param-group="{{ model_group.js_tag }}" tabindex="0">{{ model.model }}</div>
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% for model_group in model_groups %}
|
{% for model_group in docker.supported_models %}
|
||||||
{% for model in model_group.models %}
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
.. container:: model-doc {{ model.page_tag }}
|
.. container:: model-doc {{ model.js_tag }}
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@@ -136,7 +125,7 @@ Pull the Docker image
|
|||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
|
For this tutorial, it's recommended to use the latest ``{{ docker.pull_tag }}`` Docker image.
|
||||||
Pull the image using the following command:
|
Pull the image using the following command:
|
||||||
@@ -148,15 +137,17 @@ Pull the Docker image
|
|||||||
Validate and benchmark
|
Validate and benchmark
|
||||||
======================
|
======================
|
||||||
|
|
||||||
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
|
{% set docker = data.docker %}
|
||||||
|
|
||||||
Once the image has been downloaded you can follow these steps to
|
Once the image has been downloaded you can follow these steps to
|
||||||
run benchmarks and generate outputs.
|
run benchmarks and generate outputs.
|
||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
{% for model_group in docker.supported_models %}
|
||||||
|
|
||||||
{% for model_group in model_groups %}
|
|
||||||
{% for model in model_group.models %}
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
.. container:: model-doc {{model.page_tag}}
|
.. container:: model-doc {{model.js_tag}}
|
||||||
|
|
||||||
The following commands are written for {{ model.model }}.
|
The following commands are written for {{ model.model }}.
|
||||||
See :ref:`xdit-video-diffusion-supported-models` to switch to another available model.
|
See :ref:`xdit-video-diffusion-supported-models` to switch to another available model.
|
||||||
@@ -171,12 +162,11 @@ You can either use an existing Hugging Face cache or download the model fresh in
|
|||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set docker = data.xdit_diffusion_inference.docker | selectattr("version", "equalto", "v25-11") | first %}
|
{% set docker = data.docker %}
|
||||||
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
|
|
||||||
|
|
||||||
{% for model_group in model_groups %}
|
{% for model_group in docker.supported_models %}
|
||||||
{% for model in model_group.models %}
|
{% for model in model_group.models %}
|
||||||
.. container:: model-doc {{model.page_tag}}
|
.. container:: model-doc {{model.js_tag}}
|
||||||
|
|
||||||
.. tab-set::
|
.. tab-set::
|
||||||
|
|
||||||
@@ -264,11 +254,12 @@ Run inference
|
|||||||
|
|
||||||
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml
|
||||||
|
|
||||||
{% set model_groups = data.xdit_diffusion_inference.model_groups%}
|
{% set docker = data.docker %}
|
||||||
{% for model_group in model_groups %}
|
|
||||||
|
{% for model_group in docker.supported_models %}
|
||||||
{% for model in model_group.models %}
|
{% for model in model_group.models %}
|
||||||
|
|
||||||
.. container:: model-doc {{ model.page_tag }}
|
.. container:: model-doc {{ model.js_tag }}
|
||||||
|
|
||||||
.. tab-set::
|
.. tab-set::
|
||||||
|
|
||||||
@@ -309,7 +300,7 @@ Run inference
|
|||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 run.py \
|
torchrun --nproc_per_node=8 run.py \
|
||||||
--model tencent/HunyuanVideo \
|
--model {{ model.model_repo }} \
|
||||||
--prompt "In the large cage, two puppies were wagging their tails at each other." \
|
--prompt "In the large cage, two puppies were wagging their tails at each other." \
|
||||||
--height 720 --width 1280 --num_frames 129 \
|
--height 720 --width 1280 --num_frames 129 \
|
||||||
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
|
--num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \
|
||||||
@@ -317,40 +308,45 @@ Run inference
|
|||||||
--enable_tiling --enable_slicing \
|
--enable_tiling --enable_slicing \
|
||||||
--use_torch_compile \
|
--use_torch_compile \
|
||||||
--bench_output results
|
--bench_output results
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if model.model == "Wan2.1" %}
|
{% if model.model == "Wan2.1" %}
|
||||||
cd Wan2.1
|
cd Wan
|
||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 run.py \
|
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||||
--task i2v-14B \
|
--task i2v \
|
||||||
--size 720*1280 --frame_num 81 \
|
--height 720 \
|
||||||
--ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.1-I2V-14B-720P/snapshots/8823af45fcc58a8aa999a54b04be9abc7d2aac98/" \
|
--width 1280 \
|
||||||
--image "/app/Wan2.1/examples/i2v_input.JPG" \
|
--model {{ model.model_repo }} \
|
||||||
--ulysses_size 8 --ring_size 1 \
|
--img_file_path /app/Wan/i2v_input.JPG \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--seed 42 \
|
||||||
|
--num_frames 81 \
|
||||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||||
--benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \
|
--num_repetitions 1 \
|
||||||
--offload_model 0 \
|
--num_inference_steps 40 \
|
||||||
--vae_dtype bfloat16 \
|
--use_torch_compile
|
||||||
--allow_tf32 \
|
|
||||||
--compile
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if model.model == "Wan2.2" %}
|
{% if model.model == "Wan2.2" %}
|
||||||
cd Wan2.2
|
cd Wan
|
||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 run.py \
|
torchrun --nproc_per_node=8 /app/Wan/run.py \
|
||||||
--task i2v-A14B \
|
--task i2v \
|
||||||
--size 720*1280 --frame_num 81 \
|
--height 720 \
|
||||||
--ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.2-I2V-A14B/snapshots/206a9ee1b7bfaaf8f7e4d81335650533490646a3/" \
|
--width 1280 \
|
||||||
--image "/app/Wan2.2/examples/i2v_input.JPG" \
|
--model {{ model.model_repo }} \
|
||||||
--ulysses_size 8 --ring_size 1 \
|
--img_file_path /app/Wan/i2v_input.JPG \
|
||||||
|
--ulysses_degree 8 \
|
||||||
|
--seed 42 \
|
||||||
|
--num_frames 81 \
|
||||||
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
|
||||||
--benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \
|
--num_repetitions 1 \
|
||||||
--offload_model 0 \
|
--num_inference_steps 40 \
|
||||||
--vae_dtype bfloat16 \
|
--use_torch_compile
|
||||||
--allow_tf32 \
|
|
||||||
--compile
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if model.model == "FLUX.1" %}
|
{% if model.model == "FLUX.1" %}
|
||||||
@@ -358,7 +354,7 @@ Run inference
|
|||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
torchrun --nproc_per_node=8 /app/Flux/run.py \
|
torchrun --nproc_per_node=8 /app/Flux/run.py \
|
||||||
--model black-forest-labs/FLUX.1-dev \
|
--model {{ model.model_repo }} \
|
||||||
--seed 42 \
|
--seed 42 \
|
||||||
--prompt "A small cat" \
|
--prompt "A small cat" \
|
||||||
--height 1024 \
|
--height 1024 \
|
||||||
@@ -369,12 +365,28 @@ Run inference
|
|||||||
--no_use_resolution_binning \
|
--no_use_resolution_binning \
|
||||||
--ulysses_degree 8 \
|
--ulysses_degree 8 \
|
||||||
--use_torch_compile \
|
--use_torch_compile \
|
||||||
--num_repetitions 1 \
|
--num_repetitions 50
|
||||||
--benchmark_output_directory results
|
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% endif %}
|
{% if model.model == "stable-diffusion-3.5-large" %}
|
||||||
|
cd StableDiffusion3.5
|
||||||
|
mkdir results
|
||||||
|
|
||||||
|
torchrun --nproc_per_node=8 /app/StableDiffusion3.5/run.py \
|
||||||
|
--model {{ model.model_repo }} \
|
||||||
|
--num_inference_steps 28 \
|
||||||
|
--prompt "A capybara holding a sign that reads Hello World" \
|
||||||
|
--use_torch_compile \
|
||||||
|
--pipefusion_parallel_degree 4 \
|
||||||
|
--use_cfg_parallel \
|
||||||
|
--num_repetitions 50 \
|
||||||
|
--dtype torch.float16 \
|
||||||
|
--output_path results
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% elif model.model == "FLUX.1" %}results/timing.json{% elif model.model == "stable-diffusion-3.5-large"%}benchmark_results.csv{% endif %}
|
||||||
|
|
||||||
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
{% if model.model == "FLUX.1" %}You may also use ``run_usp.py`` which implements USP without modifying the default diffusers pipeline. {% endif %}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user