From 0d778f6b216aa41a571a6bca9b875d6c8cecaea9 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Thu, 11 Apr 2024 00:40:42 +0800
Subject: [PATCH 01/14] Create tgate.md
---
docs/source/en/optimization/tgate.md | 194 +++++++++++++++++++++++++++
1 file changed, 194 insertions(+)
create mode 100644 docs/source/en/optimization/tgate.md
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
new file mode 100644
index 000000000000..52d50e8589d6
--- /dev/null
+++ b/docs/source/en/optimization/tgate.md
@@ -0,0 +1,194 @@
+# TGATE
+
+[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`StableDiffusionPipeline`], and [`StableDiffusionXLPipeline`] by skipping the calculation of cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747).
+
+
+
+
+
+
+## 🚀 Major Features
+
+* Training-Free.
+* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main).
+* Only a few lines of code are required.
+* Complementary to [DeepCache](https://github.com/horseee/DeepCache).
+* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/).
+* 10%-50% speed up for different models.
+
+## 📖 Quick Start
+
+### 🛠️ Installation
+
+Start by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0):
+
+```
+pip install tgate
+```
+
+#### Requirements
+
+* pytorch>=2.0.0
+* diffusers>=0.27.2
+* transformers==4.37.2
+* DeepCache==0.1.1
+* accelerate
+
+### 🌟 Usage
+
+Accelerate `PixArtAlphaPipeline` with TGATE:
+
+```diff
+import torch
+from diffusers import PixArtAlphaPipeline
+
+pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", torch_dtype=torch.float16)
+
++ from tgate import TgatePixArtLoader
++ gate_step = 8
++ inference_step = 25
++ pipe = TgatePixArtLoader(
++ pipe,
++ gate_step=gate_step,
++ num_inference_steps=inference_step,
++ )
+pipe = pipe.to("cuda")
+
++ image = pipe.tgate(
++ "An alpaca made of colorful building blocks, cyberpunk.",
++ gate_step=gate_step,
++ num_inference_steps=inference_step,
++ ).images[0]
+```
+
+Accelerate `StableDiffusionXLPipeline` with TGATE:
+
+```diff
+import torch
+from diffusers import StableDiffusionXLPipeline
+from diffusers import DPMSolverMultistepScheduler
+
+pipe = StableDiffusionXLPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=torch.float16,
+ variant="fp16",
+ use_safetensors=True,
+)
+
++ from tgate import TgateSDXLLoader
++ gate_step = 10
++ inference_step = 25
++ pipe = TgateSDXLLoader(
++ pipe,
++ gate_step=gate_step,
++ num_inference_steps=inference_step,
++ )
+
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+pipe = pipe.to("cuda")
+
++ image = pipe.tgate(
++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
++ gate_step=gate_step,
++ num_inference_steps=inference_step
++ ).images[0]
+```
+
+Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE:
+
+```diff
+import torch
+from diffusers import StableDiffusionXLPipeline
+from diffusers import DPMSolverMultistepScheduler
+
+pipe = StableDiffusionXLPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=torch.float16,
+ variant="fp16",
+ use_safetensors=True,
+)
+
++ from tgate import TgateSDXLDeepCacheLoader
++ gate_step = 10
++ inference_step = 25
++ pipe = TgateSDXLDeepCacheLoader(
++ pipe,
++ cache_interval=3,
++ cache_branch_id=0,
++ )
+
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+pipe = pipe.to("cuda")
+
++ image = pipe.tgate(
++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
++ gate_step=gate_step,
++ num_inference_steps=inference_step
++ ).images[0]
+```
+
+Accelerate `latent-consistency/lcm-sdxl` with TGATE:
+
+```diff
+import torch
+from diffusers import StableDiffusionXLPipeline
+from diffusers import UNet2DConditionModel, LCMScheduler
+from diffusers import DPMSolverMultistepScheduler
+
+unet = UNet2DConditionModel.from_pretrained(
+ "latent-consistency/lcm-sdxl",
+ torch_dtype=torch.float16,
+ variant="fp16",
+)
+pipe = StableDiffusionXLPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ unet=unet,
+ torch_dtype=torch.float16,
+ variant="fp16",
+)
+pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+
++ from tgate import TgateSDXLLoader
++ gate_step = 1
++ inference_step = 4
++ pipe = TgateSDXLLoader(
++ pipe,
++ gate_step=gate_step,
++ num_inference_steps=inference_step,
++ lcm=True
++ )
+pipe = pipe.to("cuda")
+
++ image = pipe.tgate(
++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
++ gate_step=gate_step,
++ num_inference_steps=inference_step
++ ).images[0]
+```
+
+TGATE also supports `StableDiffusionPipeline` and `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`.
+More details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0/main.py).
+
+## 📄 Results
+| Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO |
+|-----------------------|----------|-----------|---------|---------------------------|
+| SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 |
+| SD-1.5 w/ TGATE | 9.875T | 815.557M | 4.313s | 20.789 |
+| SD-2.1 | 38.041T | 865.785M | 16.121s | 22.609 |
+| SD-2.1 w/ TGATE | 22.208T | 815.433 M | 9.878s | 19.940 |
+| SD-XL | 149.438T | 2.570B | 53.187s | 24.628 |
+| SD-XL w/ TGATE | 84.438T | 2.024B | 27.932s | 22.738 |
+| Pixart-Alpha | 107.031T | 611.350M | 61.502s | 38.669 |
+| Pixart-Alpha w/ TGATE | 65.318T | 462.585M | 37.867s | 35.825 |
+| DeepCache (SD-XL) | 57.888T | - | 19.931s | 23.755 |
+| DeepCache w/ TGATE | 43.868T | - | 14.666s | 23.999 |
+| LCM (SD-XL) | 11.955T | 2.570B | 3.805s | 25.044 |
+| LCM w/ TGATE | 11.171T | 2.024B | 3.533s | 25.028 |
+| LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 |
+| LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 |
+
+The latency is tested on a 1080ti commercial card.
+
+The MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch).
+
+The FID is calculated by [PytorchFID](https://github.com/mseitzer/pytorch-fid).
From bf97d3558382a58df44fb09880ad0d0302e889f1 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Sat, 13 Apr 2024 19:28:12 +0800
Subject: [PATCH 02/14] Update _toctree.yml
---
docs/source/en/_toctree.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 83693485d0e2..8265f315bf6c 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -172,6 +172,8 @@
title: Token merging
- local: optimization/deepcache
title: DeepCache
+ - local: optimization/tgate
+ title: TGATE
title: General optimizations
- sections:
- local: using-diffusers/stable_diffusion_jax_how_to
From df5187b306b579812419c7ff3909ec84985661f5 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:50:36 +0800
Subject: [PATCH 03/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 18 ++----------------
1 file changed, 2 insertions(+), 16 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 52d50e8589d6..7727b677c91b 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -1,20 +1,6 @@
-# TGATE
+# T-GATE
-[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`StableDiffusionPipeline`], and [`StableDiffusionXLPipeline`] by skipping the calculation of cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747).
-
-
-
-
-
-
-## 🚀 Major Features
-
-* Training-Free.
-* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main).
-* Only a few lines of code are required.
-* Complementary to [DeepCache](https://github.com/horseee/DeepCache).
-* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/).
-* 10%-50% speed up for different models.
+[T-GATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inference for [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [PixArt](../api/pipelines/pixart), and [Latency Consistency Model](../api/pipelines/latent_consistency_models.md) pipelines by skipping the cross-attention calculation once it converges. This method doesn't require any additional training and it can speed up inference from 10-50%. T-GATE is also compatible with other optimization methods like [DeepCache](./deepcache).
## 📖 Quick Start
From 0fda09c5b90686cc3513a5618d7101b949bc3d6f Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:50:46 +0800
Subject: [PATCH 04/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 7727b677c91b..5c05bb7169fb 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -2,11 +2,7 @@
[T-GATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inference for [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [PixArt](../api/pipelines/pixart), and [Latency Consistency Model](../api/pipelines/latent_consistency_models.md) pipelines by skipping the cross-attention calculation once it converges. This method doesn't require any additional training and it can speed up inference from 10-50%. T-GATE is also compatible with other optimization methods like [DeepCache](./deepcache).
-## 📖 Quick Start
-
-### 🛠️ Installation
-
-Start by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0):
+Before you begin, make sure you install T-GATE.
```
pip install tgate
From 777a1ab07049daf389c9a542cf3a7da5ccb21867 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:51:00 +0800
Subject: [PATCH 05/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 5c05bb7169fb..3784eeb007f9 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -4,7 +4,7 @@
Before you begin, make sure you install T-GATE.
-```
+```bash
pip install tgate
```
From 6a57cd3c9ccb08228033b0583daefbbf68eb28b5 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:51:32 +0800
Subject: [PATCH 06/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 3784eeb007f9..616c522b8b05 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -6,7 +6,7 @@ Before you begin, make sure you install T-GATE.
```bash
pip install tgate
-```
+pip install -U pytorch diffusers transformers accelerate DeepCache
#### Requirements
From ef60e1c2dfb4c0bb5058db0c1c48b7ddcae0f4a9 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:51:39 +0800
Subject: [PATCH 07/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 7 -------
1 file changed, 7 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 616c522b8b05..be27ea326ed2 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -8,13 +8,6 @@ Before you begin, make sure you install T-GATE.
pip install tgate
pip install -U pytorch diffusers transformers accelerate DeepCache
-#### Requirements
-
-* pytorch>=2.0.0
-* diffusers>=0.27.2
-* transformers==4.37.2
-* DeepCache==0.1.1
-* accelerate
### 🌟 Usage
From c2fffb7174abd51694463fe499cd966ba9c6f5b5 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:52:07 +0800
Subject: [PATCH 08/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index be27ea326ed2..6d9980f16165 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -9,9 +9,17 @@ pip install tgate
pip install -U pytorch diffusers transformers accelerate DeepCache
-### 🌟 Usage
+To use T-GATE with a pipeline, you need to use its corresponding loader.
-Accelerate `PixArtAlphaPipeline` with TGATE:
+| Pipeline | T-GATE Loader |
+|---|---|
+| PixArt | TgatePixArtLoader |
+| Stable Diffusion XL | TgateSDXLLoader |
+| Stable Diffusion XL + DeepCache | TgateSDXLDeepCacheLoader |
+
+Next, create a `TgateLoader` with a pipeline, the gate step(`add brief description here`), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps.
+
+Let's see how to enable this for several different pipelines.
```diff
import torch
From 3beda26928a2f33c78a8ef57633025b4ac4954cd Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:54:52 +0800
Subject: [PATCH 09/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 28 ++++++++++++----------------
1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 6d9980f16165..2717349c6836 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -21,28 +21,24 @@ Next, create a `TgateLoader` with a pipeline, the gate step(`add brief descripti
Let's see how to enable this for several different pipelines.
-```diff
+```py
import torch
from diffusers import PixArtAlphaPipeline
+from tgate import TgatePixArtLoader
pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", torch_dtype=torch.float16)
-
-+ from tgate import TgatePixArtLoader
-+ gate_step = 8
-+ inference_step = 25
-+ pipe = TgatePixArtLoader(
-+ pipe,
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step,
-+ )
+pipe = TgatePixArtLoader(
+ pipe,
+ gate_step=8,
+ num_inference_steps=25,
+)
pipe = pipe.to("cuda")
-+ image = pipe.tgate(
-+ "An alpaca made of colorful building blocks, cyberpunk.",
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step,
-+ ).images[0]
-```
+image = pipe.tgate(
+ "An alpaca made of colorful building blocks, cyberpunk.",
+ gate_step=gate_step,
+ num_inference_steps=inference_step,
+).images[0]
Accelerate `StableDiffusionXLPipeline` with TGATE:
From 3cfec749424d82c4eb7e0da05861ff3212e93569 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:55:11 +0800
Subject: [PATCH 10/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 2717349c6836..7f055a3b8d72 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -145,8 +145,7 @@ pipe = pipe.to("cuda")
+ ).images[0]
```
-TGATE also supports `StableDiffusionPipeline` and `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`.
-More details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0/main.py).
+T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS).
## 📄 Results
| Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO |
From 6e0c212db74f557033de239e21f520f54b54f667 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:55:24 +0800
Subject: [PATCH 11/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 7f055a3b8d72..6155a9e565bc 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -147,7 +147,7 @@ pipe = pipe.to("cuda")
T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS).
-## 📄 Results
+## Benchmarks
| Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO |
|-----------------------|----------|-----------|---------|---------------------------|
| SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 |
From 7af2c44891aefe80e716eefb82ec0ff874780ab3 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:55:53 +0800
Subject: [PATCH 12/14] Update docs/source/en/optimization/tgate.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
docs/source/en/optimization/tgate.md | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 6155a9e565bc..3efaf597a619 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -165,8 +165,4 @@ T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL
| LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 |
| LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 |
-The latency is tested on a 1080ti commercial card.
-
-The MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch).
-
-The FID is calculated by [PytorchFID](https://github.com/mseitzer/pytorch-fid).
+The latency is tested on an NVIDIA 1080TI, MACs and Params are calculated with [calflops](https://github.com/MrYxJ/calculate-flops.pytorch), and the FID is calculated with [PytorchFID](https://github.com/mseitzer/pytorch-fid).
From 60a24a2d1f33ef5894a16c7469f05dc7a4b39712 Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Tue, 16 Apr 2024 11:58:28 +0800
Subject: [PATCH 13/14] Update tgate.md
---
docs/source/en/optimization/tgate.md | 101 +++++++++++++++------------
1 file changed, 56 insertions(+), 45 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 3efaf597a619..8ef119823ecf 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -7,6 +7,7 @@ Before you begin, make sure you install T-GATE.
```bash
pip install tgate
pip install -U pytorch diffusers transformers accelerate DeepCache
+```
To use T-GATE with a pipeline, you need to use its corresponding loader.
@@ -16,11 +17,18 @@ To use T-GATE with a pipeline, you need to use its corresponding loader.
| PixArt | TgatePixArtLoader |
| Stable Diffusion XL | TgateSDXLLoader |
| Stable Diffusion XL + DeepCache | TgateSDXLDeepCacheLoader |
+| Stable Diffusion | TgateSDLoader |
+| Stable Diffusion + DeepCache | TgateSDDeepCacheLoader |
-Next, create a `TgateLoader` with a pipeline, the gate step(`add brief description here`), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps.
+Next, create a `TgateLoader` with a pipeline, the gate step (the time step to stop calculating the cross attention), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps.
Let's see how to enable this for several different pipelines.
+
+
+
+Accelerate `PixArtAlphaPipeline` with T-GATE:
+
```py
import torch
from diffusers import PixArtAlphaPipeline
@@ -31,18 +39,20 @@ pipe = TgatePixArtLoader(
pipe,
gate_step=8,
num_inference_steps=25,
-)
-pipe = pipe.to("cuda")
+).to("cuda")
image = pipe.tgate(
"An alpaca made of colorful building blocks, cyberpunk.",
gate_step=gate_step,
num_inference_steps=inference_step,
).images[0]
+```
+
+
-Accelerate `StableDiffusionXLPipeline` with TGATE:
+Accelerate `StableDiffusionXLPipeline` with T-GATE:
-```diff
+```py
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import DPMSolverMultistepScheduler
@@ -53,29 +63,29 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
variant="fp16",
use_safetensors=True,
)
-
-+ from tgate import TgateSDXLLoader
-+ gate_step = 10
-+ inference_step = 25
-+ pipe = TgateSDXLLoader(
-+ pipe,
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step,
-+ )
-
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-pipe = pipe.to("cuda")
-+ image = pipe.tgate(
-+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step
-+ ).images[0]
+from tgate import TgateSDXLLoader
+gate_step = 10
+inference_step = 25
+pipe = TgateSDXLLoader(
+ pipe,
+ gate_step=gate_step,
+ num_inference_steps=inference_step,
+).to("cuda")
+
+image = pipe.tgate(
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+ gate_step=gate_step,
+ num_inference_steps=inference_step
+).images[0]
```
+
+
-Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE:
+Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and T-GATE:
-```diff
+```py
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import DPMSolverMultistepScheduler
@@ -86,18 +96,16 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
variant="fp16",
use_safetensors=True,
)
-
-+ from tgate import TgateSDXLDeepCacheLoader
-+ gate_step = 10
-+ inference_step = 25
-+ pipe = TgateSDXLDeepCacheLoader(
-+ pipe,
-+ cache_interval=3,
-+ cache_branch_id=0,
-+ )
-
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-pipe = pipe.to("cuda")
+
+from tgate import TgateSDXLDeepCacheLoader
+gate_step = 10
+inference_step = 25
+pipe = TgateSDXLDeepCacheLoader(
+ pipe,
+ cache_interval=3,
+ cache_branch_id=0,
+).to("cuda")
+ image = pipe.tgate(
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
@@ -105,10 +113,12 @@ pipe = pipe.to("cuda")
+ num_inference_steps=inference_step
+ ).images[0]
```
+
+
-Accelerate `latent-consistency/lcm-sdxl` with TGATE:
+Accelerate `latent-consistency/lcm-sdxl` with T-GATE:
-```diff
+```py
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import UNet2DConditionModel, LCMScheduler
@@ -135,8 +145,7 @@ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+ gate_step=gate_step,
+ num_inference_steps=inference_step,
+ lcm=True
-+ )
-pipe = pipe.to("cuda")
++ ).to("cuda")
+ image = pipe.tgate(
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
@@ -144,6 +153,8 @@ pipe = pipe.to("cuda")
+ num_inference_steps=inference_step
+ ).images[0]
```
+
+
T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS).
@@ -151,18 +162,18 @@ T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL
| Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO |
|-----------------------|----------|-----------|---------|---------------------------|
| SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 |
-| SD-1.5 w/ TGATE | 9.875T | 815.557M | 4.313s | 20.789 |
+| SD-1.5 w/ T-GATE | 9.875T | 815.557M | 4.313s | 20.789 |
| SD-2.1 | 38.041T | 865.785M | 16.121s | 22.609 |
-| SD-2.1 w/ TGATE | 22.208T | 815.433 M | 9.878s | 19.940 |
+| SD-2.1 w/ T-GATE | 22.208T | 815.433 M | 9.878s | 19.940 |
| SD-XL | 149.438T | 2.570B | 53.187s | 24.628 |
-| SD-XL w/ TGATE | 84.438T | 2.024B | 27.932s | 22.738 |
+| SD-XL w/ T-GATE | 84.438T | 2.024B | 27.932s | 22.738 |
| Pixart-Alpha | 107.031T | 611.350M | 61.502s | 38.669 |
-| Pixart-Alpha w/ TGATE | 65.318T | 462.585M | 37.867s | 35.825 |
+| Pixart-Alpha w/ T-GATE | 65.318T | 462.585M | 37.867s | 35.825 |
| DeepCache (SD-XL) | 57.888T | - | 19.931s | 23.755 |
-| DeepCache w/ TGATE | 43.868T | - | 14.666s | 23.999 |
+| DeepCache w/ T-GATE | 43.868T | - | 14.666s | 23.999 |
| LCM (SD-XL) | 11.955T | 2.570B | 3.805s | 25.044 |
-| LCM w/ TGATE | 11.171T | 2.024B | 3.533s | 25.028 |
+| LCM w/ T-GATE | 11.171T | 2.024B | 3.533s | 25.028 |
| LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 |
-| LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 |
+| LCM w/ T-GATE | 7.623T | 462.585M | 4.543s | 37.048 |
The latency is tested on an NVIDIA 1080TI, MACs and Params are calculated with [calflops](https://github.com/MrYxJ/calculate-flops.pytorch), and the FID is calculated with [PytorchFID](https://github.com/mseitzer/pytorch-fid).
From 2eaf934e31c6604760cc937f2380a29fc724402b Mon Sep 17 00:00:00 2001
From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com>
Date: Wed, 17 Apr 2024 02:19:14 +0800
Subject: [PATCH 14/14] Update tgate.md
---
docs/source/en/optimization/tgate.md | 40 ++++++++++++++--------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md
index 8ef119823ecf..0b536a215fc0 100644
--- a/docs/source/en/optimization/tgate.md
+++ b/docs/source/en/optimization/tgate.md
@@ -107,11 +107,11 @@ pipe = TgateSDXLDeepCacheLoader(
cache_branch_id=0,
).to("cuda")
-+ image = pipe.tgate(
-+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step
-+ ).images[0]
+image = pipe.tgate(
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+ gate_step=gate_step,
+ num_inference_steps=inference_step
+).images[0]
```
@@ -137,21 +137,21 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-+ from tgate import TgateSDXLLoader
-+ gate_step = 1
-+ inference_step = 4
-+ pipe = TgateSDXLLoader(
-+ pipe,
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step,
-+ lcm=True
-+ ).to("cuda")
-
-+ image = pipe.tgate(
-+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
-+ gate_step=gate_step,
-+ num_inference_steps=inference_step
-+ ).images[0]
+from tgate import TgateSDXLLoader
+gate_step = 1
+inference_step = 4
+pipe = TgateSDXLLoader(
+ pipe,
+ gate_step=gate_step,
+ num_inference_steps=inference_step,
+ lcm=True
+).to("cuda")
+
+image = pipe.tgate(
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.",
+ gate_step=gate_step,
+ num_inference_steps=inference_step
+).images[0]
```