From 1515fb3779d943a65dd509e6820b61a296fda6f8 Mon Sep 17 00:00:00 2001 From: peterjunpark Date: Mon, 27 Oct 2025 16:22:28 -0400 Subject: [PATCH] Revert "Add xdit diffusion docs (#5576)" (#5580) This reverts commit 4132a2609c49ba2ef735539d816dcaa87b1ccae7. --- .wordlist.txt | 6 - docs/conf.py | 1 - .../inference/xdit-inference-models.yaml | 38 --- docs/how-to/rocm-for-ai/inference/index.rst | 4 +- .../inference/xdit-video-diffusion.rst | 322 ------------------ docs/sphinx/_toc.yml.in | 2 - 6 files changed, 1 insertion(+), 372 deletions(-) delete mode 100644 docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml delete mode 100644 docs/how-to/rocm-for-ai/inference/xdit-video-diffusion.rst diff --git a/.wordlist.txt b/.wordlist.txt index 68185fbe9..aed9dc1cc 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -220,7 +220,6 @@ href Hyperparameters HybridEngine Huggingface -Hunyuan IB ICD ICT @@ -532,7 +531,6 @@ UAC UC UCC UCX -ud UE UIF UMC @@ -844,7 +842,6 @@ pallas parallelization parallelizing param -params parameterization passthrough pe @@ -891,7 +888,6 @@ querySelectorAll queueing qwen radeon -rc rccl rdc rdma @@ -1056,8 +1052,6 @@ writebacks wrreq wzo xargs -xdit -xDiT xGMI xPacked xz diff --git a/docs/conf.py b/docs/conf.py index 85c6863ba..5a6298e04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -175,7 +175,6 @@ article_pages = [ {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.10.0-20250812", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/sglang-history", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]}, - {"file": "how-to/rocm-for-ai/inference/xdit-video-diffusion", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]}, {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]}, diff --git a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml b/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml deleted file mode 100644 index 60f52aae7..000000000 --- a/docs/data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml +++ /dev/null @@ -1,38 +0,0 @@ -xdit_video_diffusion: - docker: - pull_tag: amdsiloai/pytorch-xdit:v25.9 - docker_hub_url: https://hub.docker.com/r/amdsiloai/pytorch-xdit - ROCm: 7.0.0rc - components: - TheRock: 7afbe45 - rccl: 9b04b2a - composable_kernel: b7a806f - rocm-libraries: f104555 - rocm-systems: 25922d0 - torch: 2.10.0a0+git3caf6da - torchvision: 0.22.0a0+966da7e - triton: 3.5.0+gitea06d636 - - model_groups: - - group: Hunyuan Video - tag: hunyuan - models: - - model: Hunyuan Video - model_name: hunyuanvideo - model_repo: tencent/HunyuanVideo - revision: refs/pr/18 - url: https://huggingface.co/tencent/HunyuanVideo - github: https://github.com/Tencent-Hunyuan/HunyuanVideo - - group: Wan-AI - tag: wan - models: - - model: Wan2.1 - model_name: wan2.1_i2v_14b_720p - model_repo: Wan-AI/Wan2.1-I2V-14B-720P - url: https://huggingface.co/Wan-AI/Wan2.1-I2V-14B-720P - github: https://github.com/Wan-Video/Wan2.1 - - model: Wan2.2 - model_name: wan2.2-i2v-a14b - model_repo: Wan-AI/Wan2.2-I2V-A14B - url: https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B - github: https://github.com/Wan-Video/Wan2.2 \ No newline at end of file diff --git a/docs/how-to/rocm-for-ai/inference/index.rst b/docs/how-to/rocm-for-ai/inference/index.rst index 4f66fd82f..6eb705141 100644 --- a/docs/how-to/rocm-for-ai/inference/index.rst +++ b/docs/how-to/rocm-for-ai/inference/index.rst @@ -26,6 +26,4 @@ training, fine-tuning, and inference. It leverages popular machine learning fram - :doc:`SGLang inference performance testing ` -- :doc:`xDiT video inference ` - -- :doc:`Deploying your model ` \ No newline at end of file +- :doc:`Deploying your model ` diff --git a/docs/how-to/rocm-for-ai/inference/xdit-video-diffusion.rst b/docs/how-to/rocm-for-ai/inference/xdit-video-diffusion.rst deleted file mode 100644 index af98cc187..000000000 --- a/docs/how-to/rocm-for-ai/inference/xdit-video-diffusion.rst +++ /dev/null @@ -1,322 +0,0 @@ -.. meta:: - :description: Learn to validate diffusion model video generation on MI300X, MI350X and MI355X accelerators using - prebuilt and optimized docker images. - :keywords: xDiT, diffusion, video, video generation, validate, benchmark - -******************** -xDiT video inference -******************** - -.. _xdit-video-diffusion: - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% set docker = data.xdit_video_diffusion.docker %} - {% set model_groups = data.xdit_video_diffusion.model_groups%} - - The `amdsiloai/pytorch-xdit Docker <{{ docker.docker_hub_url }}>`_ image offers a prebuilt, optimized environment based on `xDiT `_ for - benchmarking diffusion model video generation on - AMD Instinctâ„¢ MI355X, MI350X (gfx950), and MI300X GPUs. - The image runs ROCm `{{docker.ROCm}}` based on `TheRock `_ - and includes the following components: - - .. tab-set:: - - .. tab-item:: {{ docker.pull_tag }} - - .. list-table:: - :header-rows: 1 - - * - Software component - - Version - - {% for component_name, component_version in docker.components.items() %} - * - {{ component_name }} - - {{ component_version }} - {% endfor %} - -Follow this guide to pull the required image, spin up a container, download the model, and run a benchmark. - -What's new -========== - -- Initial release -- ROCm: 7.0.0rc -- Added support for AMD Instinctâ„¢ MI355X, MI350X (gfx950), and MI300X (gfx942) GPUs. -- Added support for Wan 2.1, Wan 2.2 and Hunyuan Video models with MIOpen optimizations. - -.. _xdit-video-diffusion-supported-models: - -Supported models -================ - -The following models are supported for inference performance benchmarking. -Some instructions, commands, and recommendations in this documentation might -vary by model -- select one to get started. - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% set docker = data.xdit_video_diffusion.docker %} - {% set model_groups = data.xdit_video_diffusion.model_groups%} - - .. raw:: html - -
-
-
Model
-
- {% for model_group in model_groups %} -
{{ model_group.group }}
- {% endfor %} -
-
- -
-
Variant
-
- {% for model_group in model_groups %} - {% set models = model_group.models %} - {% for model in models %} - {% if models|length % 3 == 0 %} -
{{ model.model }}
- {% else %} -
{{ model.model }}
- {% endif %} - {% endfor %} - {% endfor %} -
-
-
- - {% for model_group in model_groups %} - {% for model in model_group.models %} - - .. container:: model-doc {{model.model_name}} - - .. note:: - - To learn more about your specific model see the `{{ model.model }} model card on Hugging Face <{{ model.url }}>`_ - or visit the `GitHub page <{{ model.github }}>`__. Note that some models require access authorization before use via an - external license agreement through a third party. - - {% endfor %} - {% endfor %} - -System validation -================= - -Before running AI workloads, it's important to validate that your AMD hardware is configured -correctly and performing optimally. - -If you have already validated your system settings, including aspects like NUMA auto-balancing, you -can skip this step. Otherwise, complete the procedures in the :ref:`System validation and -optimization ` guide to properly configure your system settings -before starting. - -To test for optimal performance, consult the recommended :ref:`System health benchmarks -`. This suite of tests will help you verify and fine-tune your -system's configuration. - -Pull the Docker image -===================== - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% set docker = data.xdit_video_diffusion.docker %} - - For this tutorial, it's recommended to use the ``{{ docker.pull_tag }}`` Docker image. - Pull the image using the following command: - - .. code-block:: shell - - docker pull {{ docker.pull_tag }} - -Validate and benchmark -====================== - -Once the image has been downloaded you can follow these steps to -run benchmarks and generate a video. - -.. warning:: - - If your host/OS ROCm installation is below 6.4.2 (see with ``apt show rocm-libs``) you need to export - the ``HSA_NO_SCRATCH_RECLAIM=1`` environment variable inside the container, or the workload will crash. - If possible, ask your system administrator to upgrade ROCm. - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% for model_group in model_groups %} - {% for model in model_group.models %} - - .. container:: model-doc {{model.model_name}} - - The following commands are written for {{ model.model }}. - See :ref:`xdit-video-diffusion-supported-models` to switch to another available model. - - {% endfor %} - {% endfor %} - -Choose your setup method ------------------------- - -You can either use an existing Hugging Face cache or download the model fresh inside the container. - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% set docker = data.xdit_video_diffusion.docker %} - {% set model_groups = data.xdit_video_diffusion.model_groups%} - - {% for model_group in model_groups %} - {% for model in model_group.models %} - .. container:: model-doc {{model.model_name}} - - .. tab-set:: - - .. tab-item:: Option 1: Use existing Hugging Face cache - - If you already have models downloaded on your host system, you can mount your existing cache. - - 1. Set your Hugging Face cache location. - - .. code-block:: shell - - export HF_HOME=/your/hf_cache/location - - 2. Download the model (if not already cached). - - .. code-block:: shell - - huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %} - - 3. Launch the container with mounted cache. - - .. code-block:: shell - - docker run \ - -it --rm \ - --cap-add=SYS_PTRACE \ - --security-opt seccomp=unconfined \ - --user root \ - --device=/dev/kfd \ - --device=/dev/dri \ - --group-add video \ - --ipc=host \ - --network host \ - --privileged \ - --shm-size 128G \ - --name pytorch-xdit \ - -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ - -e HF_HOME=/app/huggingface_models \ - -v $HF_HOME:/app/huggingface_models \ - {{ docker.pull_tag }} - - .. tab-item:: Option 2: Download inside container - - If you prefer to keep the container self-contained or don't have an existing cache. - - 1. Launch the container - - .. code-block:: shell - - docker run \ - -it --rm \ - --cap-add=SYS_PTRACE \ - --security-opt seccomp=unconfined \ - --user root \ - --device=/dev/kfd \ - --device=/dev/dri \ - --group-add video \ - --ipc=host \ - --network host \ - --privileged \ - --shm-size 128G \ - --name pytorch-xdit \ - -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ - {{ docker.pull_tag }} - - 2. Inside the container, set the Hugging Face cache location and download the model. - - .. code-block:: shell - - export HF_HOME=/your/hf_cache/location - huggingface-cli download {{ model.model_repo }} {% if model.revision %} --revision {{ model.revision }} {% endif %} - - .. warning:: - - Models will be downloaded to the container's filesystem and will be lost when the container is removed unless you persist the data with a volume. - {% endfor %} - {% endfor %} - -Run inference -============= - -.. datatemplate:yaml:: /data/how-to/rocm-for-ai/inference/xdit-inference-models.yaml - - {% set model_groups = data.xdit_video_diffusion.model_groups%} - {% for model_group in model_groups %} - {% for model in model_group.models %} - - .. container:: model-doc {{ model.model_name }} - - To run the benchmarks for {{ model.model }}, use the following command: - - .. code-block:: shell - {% if model.model == "Hunyuan Video" %} - cd /app/Hunyuanvideo - mkdir results - - torchrun --nproc_per_node=8 run.py \ - --model tencent/HunyuanVideo \ - --prompt "In the large cage, two puppies were wagging their tails at each other." \ - --height 720 --width 1280 --num_frames 129 \ - --num_inference_steps 50 --warmup_steps 1 --n_repeats 1 \ - --ulysses_degree 8 \ - --enable_tiling --enable_slicing \ - --use_torch_compile \ - --bench_output results - {% endif %} - {% if model.model == "Wan2.1" %} - cd Wan2.1 - mkdir results - - torchrun --nproc_per_node=8 run.py \ - --task i2v-14B \ - --size 720*1280 --frame_num 81 \ - --ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.1-I2V-14B-720P/snapshots/8823af45fcc58a8aa999a54b04be9abc7d2aac98/" \ - --image "/app/Wan2.1/examples/i2v_input.JPG" \ - --ulysses_size 8 --ring_size 1 \ - --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \ - --benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \ - --offload_model 0 \ - --vae_dtype bfloat16 - {% endif %} - {% if model.model == "Wan2.2" %} - cd Wan2.2 - mkdir results - - torchrun --nproc_per_node=8 run.py \ - --task i2v-A14B \ - --size 720*1280 --frame_num 81 \ - --ckpt_dir "${HF_HOME}/hub/models--Wan-AI--Wan2.2-I2V-A14B/snapshots/206a9ee1b7bfaaf8f7e4d81335650533490646a3/" \ - --image "/app/Wan2.2/examples/i2v_input.JPG" \ - --ulysses_size 8 --ring_size 1 \ - --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \ - --benchmark_output_directory results --save_file video.mp4 --num_benchmark_steps 1 \ - --offload_model 0 \ - --vae_dtype bfloat16 - {% endif %} - - {% if model.model in ["Wan2.1", "Wan2.2"] %} - For additional performance improvements, consider adding the ``--compile`` flag to the above command. Note that this can significantly increase startup time on the first call. - {% endif %} - - The generated video will be stored under the results directory. For the actual benchmark step runtimes, see {% if model.model == "Hunyuan Video" %}stdout.{% elif model.model in ["Wan2.1", "Wan2.2"] %}results/outputs/rank0_*.json{% endif %} - - {% endfor %} - {% endfor %} - -Known limitations -================= - -- OOB tuning: Currently only Instinct MI300X has been tuned for in the gfx942 - series. Other gfx942 GPUs might not perform optimally out-of-the-box. \ No newline at end of file diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 253f4416f..a0a5084ff 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -117,8 +117,6 @@ subtrees: title: SGLang inference performance testing - file: how-to/rocm-for-ai/inference/benchmark-docker/sglang-distributed.rst title: SGLang distributed inference with Mooncake - - file: how-to/rocm-for-ai/inference/xdit-video-diffusion.rst - title: xDiT video inference - file: how-to/rocm-for-ai/inference/deploy-your-model.rst title: Deploy your model