feedback

stevhliu · stevhliu · commit 0d3f91154eb8 · 2025-05-20T09:17:49.000-07:00
diff --git a/docs/source/en/api/pipelines/cogvideox.md b/docs/source/en/api/pipelines/cogvideox.md
@@ -15,7 +15,9 @@
 
 <div style="float: right;">
   <div class="flex flex-wrap space-x-1">
-    <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
+      <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    </a>
   </div>
 </div>
 
@@ -90,7 +92,7 @@ export_to_video(video, "output.mp4", fps=8)
 </hfoption>
 <hfoption id="inference speed">
 
-Compilation is slow the first time but subsequent calls to the pipeline are faster.
+[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
 
 The average inference time with torch.compile on a 80GB A100 is 76.27 seconds compared to 96.89 seconds for an uncompiled model.
 
@@ -132,6 +134,9 @@ export_to_video(video, "output.mp4", fps=8)
 
 - CogVideoX supports LoRAs with [`~loaders.CogVideoXLoraLoaderMixin.load_lora_weights`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   import torch
   from diffusers import CogVideoXPipeline
@@ -167,6 +172,8 @@ export_to_video(video, "output.mp4", fps=8)
   export_to_video(video, "output.mp4", fps=16)
   ```
 
+  </details>
+
 - The text-to-video (T2V) checkpoints work best with a resolution of 1360x768 because that was the resolution it was pretrained on.
 
 - The image-to-video (I2V) checkpoints work with multiple resolutions. The width can vary from 768 to 1360, but the height must be 758. Both height and width must be divisible by 16.
diff --git a/docs/source/en/api/pipelines/hunyuan_video.md b/docs/source/en/api/pipelines/hunyuan_video.md
@@ -14,7 +14,9 @@
 
 <div style="float: right;">
   <div class="flex flex-wrap space-x-1">
-    <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
+      <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    </a>
   </div>
 </div>
 
@@ -46,13 +48,13 @@ from diffusers.utils import export_to_video
 
 # quantize weights to int4 with bitsandbytes
 pipeline_quant_config = PipelineQuantizationConfig(
-  quant_backend="bitsandbytes_4bit",
-  quant_kwargs={
-    "load_in_4bit": True,
-    "bnb_4bit_quant_type": "nf4",
-    "bnb_4bit_compute_dtype": torch.bfloat16
-    },
-  components_to_quantize=["transformer"]
+    quant_backend="bitsandbytes_4bit",
+    quant_kwargs={
+      "load_in_4bit": True,
+      "bnb_4bit_quant_type": "nf4",
+      "bnb_4bit_compute_dtype": torch.bfloat16
+      },
+    components_to_quantize=["transformer"]
 )
 
 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -73,7 +75,7 @@ export_to_video(video, "output.mp4", fps=15)
 </hfoption>
 <hfoption id="inference speed">
 
-Compilation is slow the first time but subsequent calls to the pipeline are faster.
+[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
 
 ```py
 import torch
@@ -83,13 +85,13 @@ from diffusers.utils import export_to_video
 
 # quantize weights to int4 with bitsandbytes
 pipeline_quant_config = PipelineQuantizationConfig(
-  quant_backend="bitsandbytes_4bit",
-  quant_kwargs={
-    "load_in_4bit": True,
-    "bnb_4bit_quant_type": "nf4",
-    "bnb_4bit_compute_dtype": torch.bfloat16
-    },
-  components_to_quantize=["transformer"]
+    quant_backend="bitsandbytes_4bit",
+    quant_kwargs={
+      "load_in_4bit": True,
+      "bnb_4bit_quant_type": "nf4",
+      "bnb_4bit_compute_dtype": torch.bfloat16
+      },
+    components_to_quantize=["transformer"]
 )
 
 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -120,6 +122,9 @@ export_to_video(video, "output.mp4", fps=15)
 
 - HunyuanVideo supports LoRAs with [`~loaders.HunyuanVideoLoraLoaderMixin.load_lora_weights`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   import torch
   from diffusers import AutoModel, HunyuanVideoPipeline
@@ -128,13 +133,13 @@ export_to_video(video, "output.mp4", fps=15)
 
   # quantize weights to int4 with bitsandbytes
   pipeline_quant_config = PipelineQuantizationConfig(
-    quant_backend="bitsandbytes_4bit",
-    quant_kwargs={
-      "load_in_4bit": True,
-      "bnb_4bit_quant_type": "nf4",
-      "bnb_4bit_compute_dtype": torch.bfloat16
-      },
-    components_to_quantize=["transformer"]
+      quant_backend="bitsandbytes_4bit",
+      quant_kwargs={
+        "load_in_4bit": True,
+        "bnb_4bit_quant_type": "nf4",
+        "bnb_4bit_compute_dtype": torch.bfloat16
+        },
+      components_to_quantize=["transformer"]
   )
 
   pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -159,6 +164,8 @@ export_to_video(video, "output.mp4", fps=15)
   export_to_video(video, "output.mp4", fps=15)
   ```
 
+  </details>
+
 - Refer to the table below for recommended inference values.
 
   | parameter | recommended value |
diff --git a/docs/source/en/api/pipelines/ltx_video.md b/docs/source/en/api/pipelines/ltx_video.md
@@ -14,7 +14,9 @@
 
 <div style="float: right;">
   <div class="flex flex-wrap space-x-1">
-    <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
+      <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    </a>
   </div>
 </div>
 
@@ -82,7 +84,7 @@ export_to_video(video, "output.mp4", fps=24)
 </hfoption>
 <hfoption id="inference speed">
 
-Compilation is slow the first time but subsequent calls to the pipeline are faster.
+[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
 
 ```py
 import torch
@@ -124,6 +126,9 @@ export_to_video(video, "output.mp4", fps=24)
 
 - LTX-Video supports LoRAs with [`~loaders.LTXVideoLoraLoaderMixin.load_lora_weights`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   import torch
   from diffusers import LTXConditionPipeline
@@ -153,8 +158,13 @@ export_to_video(video, "output.mp4", fps=24)
   export_to_video(video, "output.mp4", fps=26)
   ```
 
+  </details>
+
 - LTX-Video supports loading from single files, such as [GGUF checkpoints](../../quantization/gguf), with [`loaders.FromOriginalModelMixin.from_single_file`] or [`loaders.FromSingleFileMixin.from_single_file`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   import torch
   from diffusers.utils import export_to_video
@@ -172,6 +182,8 @@ export_to_video(video, "output.mp4", fps=24)
   )
   ```
 
+  </details>
+
 ## LTXPipeline
 
 [[autodoc]] LTXPipeline
diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md
@@ -14,7 +14,9 @@
 
 <div style="float: right;">
   <div class="flex flex-wrap space-x-1">
-    <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    <a href="https://huggingface.co/docs/diffusers/main/en/tutorials/using_peft_for_inference" target="_blank" rel="noopener">
+      <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+    </a>
   </div>
 </div>
 
@@ -100,7 +102,7 @@ export_to_video(output, "output.mp4", fps=16)
 </hfoption>
 <hfoption id="inference speed">
 
-Compilation is slow the first time but subsequent calls to the pipeline are faster.
+[Compilation](../../optimization/fp16#torchcompile) is slow the first time but subsequent calls to the pipeline are faster.
 
 ```py
 # pip install ftfy
@@ -159,6 +161,9 @@ export_to_video(output, "output.mp4", fps=16)
 
 - Wan2.1 supports LoRAs with [`~loaders.WanLoraLoaderMixin.load_lora_weights`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   # pip install ftfy
   import torch
@@ -199,8 +204,13 @@ export_to_video(output, "output.mp4", fps=16)
   export_to_video(output, "output.mp4", fps=16)
   ```
 
+  </details>
+
 - [`WanTransformer3DModel`] and [`AutoencoderKLWan`] supports loading from single files with [`~loaders.FromSingleFileMixin.from_single_file`].
 
+  <details>
+  <summary>Show example code</summary>
+
   ```py
   # pip install ftfy
   import torch
@@ -221,6 +231,8 @@ export_to_video(output, "output.mp4", fps=16)
   )
   ```
 
+  </details>
+
 - Set the [`AutoencoderKLWan`] dtype to `torch.float32` for better decoding quality.
 
 - The number of frames per second (fps) or `k` should be calculated by `4 * k + 1`.