From 0d778f6b216aa41a571a6bca9b875d6c8cecaea9 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Thu, 11 Apr 2024 00:40:42 +0800 Subject: [PATCH 01/14] Create tgate.md --- docs/source/en/optimization/tgate.md | 194 +++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 docs/source/en/optimization/tgate.md diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md new file mode 100644 index 000000000000..52d50e8589d6 --- /dev/null +++ b/docs/source/en/optimization/tgate.md @@ -0,0 +1,194 @@ +# TGATE + +[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`StableDiffusionPipeline`], and [`StableDiffusionXLPipeline`] by skipping the calculation of cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747). + +![](https://github.com/HaozheLiu-ST/T-GATE/assets/53887227/bff43e0e-2372-4edc-9ba1-64dcbc649329) + + + + +## 🚀 Major Features + +* Training-Free. +* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main). +* Only a few lines of code are required. +* Complementary to [DeepCache](https://github.com/horseee/DeepCache). +* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/). +* 10%-50% speed up for different models. + +## 📖 Quick Start + +### 🛠️ Installation + +Start by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0): + +``` +pip install tgate +``` + +#### Requirements + +* pytorch>=2.0.0 +* diffusers>=0.27.2 +* transformers==4.37.2 +* DeepCache==0.1.1 +* accelerate + +### 🌟 Usage + +Accelerate `PixArtAlphaPipeline` with TGATE: + +```diff +import torch +from diffusers import PixArtAlphaPipeline + +pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", torch_dtype=torch.float16) + ++ from tgate import TgatePixArtLoader ++ gate_step = 8 ++ inference_step = 25 ++ pipe = TgatePixArtLoader( ++ pipe, ++ gate_step=gate_step, ++ num_inference_steps=inference_step, ++ ) +pipe = pipe.to("cuda") + ++ image = pipe.tgate( ++ "An alpaca made of colorful building blocks, cyberpunk.", ++ gate_step=gate_step, ++ num_inference_steps=inference_step, ++ ).images[0] +``` + +Accelerate `StableDiffusionXLPipeline` with TGATE: + +```diff +import torch +from diffusers import StableDiffusionXLPipeline +from diffusers import DPMSolverMultistepScheduler + +pipe = StableDiffusionXLPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, +) + ++ from tgate import TgateSDXLLoader ++ gate_step = 10 ++ inference_step = 25 ++ pipe = TgateSDXLLoader( ++ pipe, ++ gate_step=gate_step, ++ num_inference_steps=inference_step, ++ ) + +pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) +pipe = pipe.to("cuda") + ++ image = pipe.tgate( ++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", ++ gate_step=gate_step, ++ num_inference_steps=inference_step ++ ).images[0] +``` + +Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE: + +```diff +import torch +from diffusers import StableDiffusionXLPipeline +from diffusers import DPMSolverMultistepScheduler + +pipe = StableDiffusionXLPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, +) + ++ from tgate import TgateSDXLDeepCacheLoader ++ gate_step = 10 ++ inference_step = 25 ++ pipe = TgateSDXLDeepCacheLoader( ++ pipe, ++ cache_interval=3, ++ cache_branch_id=0, ++ ) + +pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) +pipe = pipe.to("cuda") + ++ image = pipe.tgate( ++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", ++ gate_step=gate_step, ++ num_inference_steps=inference_step ++ ).images[0] +``` + +Accelerate `latent-consistency/lcm-sdxl` with TGATE: + +```diff +import torch +from diffusers import StableDiffusionXLPipeline +from diffusers import UNet2DConditionModel, LCMScheduler +from diffusers import DPMSolverMultistepScheduler + +unet = UNet2DConditionModel.from_pretrained( + "latent-consistency/lcm-sdxl", + torch_dtype=torch.float16, + variant="fp16", +) +pipe = StableDiffusionXLPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + unet=unet, + torch_dtype=torch.float16, + variant="fp16", +) +pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + ++ from tgate import TgateSDXLLoader ++ gate_step = 1 ++ inference_step = 4 ++ pipe = TgateSDXLLoader( ++ pipe, ++ gate_step=gate_step, ++ num_inference_steps=inference_step, ++ lcm=True ++ ) +pipe = pipe.to("cuda") + ++ image = pipe.tgate( ++ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", ++ gate_step=gate_step, ++ num_inference_steps=inference_step ++ ).images[0] +``` + +TGATE also supports `StableDiffusionPipeline` and `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`. +More details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0/main.py). + +## 📄 Results +| Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO | +|-----------------------|----------|-----------|---------|---------------------------| +| SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 | +| SD-1.5 w/ TGATE | 9.875T | 815.557M | 4.313s | 20.789 | +| SD-2.1 | 38.041T | 865.785M | 16.121s | 22.609 | +| SD-2.1 w/ TGATE | 22.208T | 815.433 M | 9.878s | 19.940 | +| SD-XL | 149.438T | 2.570B | 53.187s | 24.628 | +| SD-XL w/ TGATE | 84.438T | 2.024B | 27.932s | 22.738 | +| Pixart-Alpha | 107.031T | 611.350M | 61.502s | 38.669 | +| Pixart-Alpha w/ TGATE | 65.318T | 462.585M | 37.867s | 35.825 | +| DeepCache (SD-XL) | 57.888T | - | 19.931s | 23.755 | +| DeepCache w/ TGATE | 43.868T | - | 14.666s | 23.999 | +| LCM (SD-XL) | 11.955T | 2.570B | 3.805s | 25.044 | +| LCM w/ TGATE | 11.171T | 2.024B | 3.533s | 25.028 | +| LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 | +| LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 | + +The latency is tested on a 1080ti commercial card. + +The MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch). + +The FID is calculated by [PytorchFID](https://github.com/mseitzer/pytorch-fid). From bf97d3558382a58df44fb09880ad0d0302e889f1 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Sat, 13 Apr 2024 19:28:12 +0800 Subject: [PATCH 02/14] Update _toctree.yml --- docs/source/en/_toctree.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 83693485d0e2..8265f315bf6c 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -172,6 +172,8 @@ title: Token merging - local: optimization/deepcache title: DeepCache + - local: optimization/tgate + title: TGATE title: General optimizations - sections: - local: using-diffusers/stable_diffusion_jax_how_to From df5187b306b579812419c7ff3909ec84985661f5 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:50:36 +0800 Subject: [PATCH 03/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 52d50e8589d6..7727b677c91b 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -1,20 +1,6 @@ -# TGATE +# T-GATE -[TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inferences of [`PixArtAlphaPipeline`], [`StableDiffusionPipeline`], and [`StableDiffusionXLPipeline`] by skipping the calculation of cross-attention once it converges. More details can be found at [technical report](https://huggingface.co/papers/2404.02747). - -![](https://github.com/HaozheLiu-ST/T-GATE/assets/53887227/bff43e0e-2372-4edc-9ba1-64dcbc649329) - - - - -## 🚀 Major Features - -* Training-Free. -* Easily Integrate into [Diffusers](https://github.com/huggingface/diffusers/tree/main). -* Only a few lines of code are required. -* Complementary to [DeepCache](https://github.com/horseee/DeepCache). -* Friendly support [Stable Diffusion pipelines](https://huggingface.co/stabilityai), [PixArt](https://pixart-alpha.github.io/), and [Latent Consistency Models](https://latent-consistency-models.github.io/). -* 10%-50% speed up for different models. +[T-GATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inference for [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [PixArt](../api/pipelines/pixart), and [Latency Consistency Model](../api/pipelines/latent_consistency_models.md) pipelines by skipping the cross-attention calculation once it converges. This method doesn't require any additional training and it can speed up inference from 10-50%. T-GATE is also compatible with other optimization methods like [DeepCache](./deepcache). ## 📖 Quick Start From 0fda09c5b90686cc3513a5618d7101b949bc3d6f Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:50:46 +0800 Subject: [PATCH 04/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 7727b677c91b..5c05bb7169fb 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -2,11 +2,7 @@ [T-GATE](https://github.com/HaozheLiu-ST/T-GATE/tree/main) accelerates inference for [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [PixArt](../api/pipelines/pixart), and [Latency Consistency Model](../api/pipelines/latent_consistency_models.md) pipelines by skipping the cross-attention calculation once it converges. This method doesn't require any additional training and it can speed up inference from 10-50%. T-GATE is also compatible with other optimization methods like [DeepCache](./deepcache). -## 📖 Quick Start - -### 🛠️ Installation - -Start by installing [TGATE](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0): +Before you begin, make sure you install T-GATE. ``` pip install tgate From 777a1ab07049daf389c9a542cf3a7da5ccb21867 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:51:00 +0800 Subject: [PATCH 05/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 5c05bb7169fb..3784eeb007f9 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -4,7 +4,7 @@ Before you begin, make sure you install T-GATE. -``` +```bash pip install tgate ``` From 6a57cd3c9ccb08228033b0583daefbbf68eb28b5 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:51:32 +0800 Subject: [PATCH 06/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 3784eeb007f9..616c522b8b05 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -6,7 +6,7 @@ Before you begin, make sure you install T-GATE. ```bash pip install tgate -``` +pip install -U pytorch diffusers transformers accelerate DeepCache #### Requirements From ef60e1c2dfb4c0bb5058db0c1c48b7ddcae0f4a9 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:51:39 +0800 Subject: [PATCH 07/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 616c522b8b05..be27ea326ed2 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -8,13 +8,6 @@ Before you begin, make sure you install T-GATE. pip install tgate pip install -U pytorch diffusers transformers accelerate DeepCache -#### Requirements - -* pytorch>=2.0.0 -* diffusers>=0.27.2 -* transformers==4.37.2 -* DeepCache==0.1.1 -* accelerate ### 🌟 Usage From c2fffb7174abd51694463fe499cd966ba9c6f5b5 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:52:07 +0800 Subject: [PATCH 08/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index be27ea326ed2..6d9980f16165 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -9,9 +9,17 @@ pip install tgate pip install -U pytorch diffusers transformers accelerate DeepCache -### 🌟 Usage +To use T-GATE with a pipeline, you need to use its corresponding loader. -Accelerate `PixArtAlphaPipeline` with TGATE: +| Pipeline | T-GATE Loader | +|---|---| +| PixArt | TgatePixArtLoader | +| Stable Diffusion XL | TgateSDXLLoader | +| Stable Diffusion XL + DeepCache | TgateSDXLDeepCacheLoader | + +Next, create a `TgateLoader` with a pipeline, the gate step(`add brief description here`), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps. + +Let's see how to enable this for several different pipelines. ```diff import torch From 3beda26928a2f33c78a8ef57633025b4ac4954cd Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:54:52 +0800 Subject: [PATCH 09/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 6d9980f16165..2717349c6836 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -21,28 +21,24 @@ Next, create a `TgateLoader` with a pipeline, the gate step(`add brief descripti Let's see how to enable this for several different pipelines. -```diff +```py import torch from diffusers import PixArtAlphaPipeline +from tgate import TgatePixArtLoader pipe = PixArtAlphaPipeline.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", torch_dtype=torch.float16) - -+ from tgate import TgatePixArtLoader -+ gate_step = 8 -+ inference_step = 25 -+ pipe = TgatePixArtLoader( -+ pipe, -+ gate_step=gate_step, -+ num_inference_steps=inference_step, -+ ) +pipe = TgatePixArtLoader( + pipe, + gate_step=8, + num_inference_steps=25, +) pipe = pipe.to("cuda") -+ image = pipe.tgate( -+ "An alpaca made of colorful building blocks, cyberpunk.", -+ gate_step=gate_step, -+ num_inference_steps=inference_step, -+ ).images[0] -``` +image = pipe.tgate( + "An alpaca made of colorful building blocks, cyberpunk.", + gate_step=gate_step, + num_inference_steps=inference_step, +).images[0] Accelerate `StableDiffusionXLPipeline` with TGATE: From 3cfec749424d82c4eb7e0da05861ff3212e93569 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:55:11 +0800 Subject: [PATCH 10/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 2717349c6836..7f055a3b8d72 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -145,8 +145,7 @@ pipe = pipe.to("cuda") + ).images[0] ``` -TGATE also supports `StableDiffusionPipeline` and `PixArt-alpha/PixArt-LCM-XL-2-1024-MS`. -More details can be found at [here](https://github.com/HaozheLiu-ST/T-GATE/tree/release-v.0.1.0/main.py). +T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS). ## 📄 Results | Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO | From 6e0c212db74f557033de239e21f520f54b54f667 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:55:24 +0800 Subject: [PATCH 11/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 7f055a3b8d72..6155a9e565bc 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -147,7 +147,7 @@ pipe = pipe.to("cuda") T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS). -## 📄 Results +## Benchmarks | Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO | |-----------------------|----------|-----------|---------|---------------------------| | SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 | From 7af2c44891aefe80e716eefb82ec0ff874780ab3 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:55:53 +0800 Subject: [PATCH 12/14] Update docs/source/en/optimization/tgate.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/optimization/tgate.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 6155a9e565bc..3efaf597a619 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -165,8 +165,4 @@ T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL | LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 | | LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 | -The latency is tested on a 1080ti commercial card. - -The MACs and Params are calculated by [calflops](https://github.com/MrYxJ/calculate-flops.pytorch). - -The FID is calculated by [PytorchFID](https://github.com/mseitzer/pytorch-fid). +The latency is tested on an NVIDIA 1080TI, MACs and Params are calculated with [calflops](https://github.com/MrYxJ/calculate-flops.pytorch), and the FID is calculated with [PytorchFID](https://github.com/mseitzer/pytorch-fid). From 60a24a2d1f33ef5894a16c7469f05dc7a4b39712 Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:58:28 +0800 Subject: [PATCH 13/14] Update tgate.md --- docs/source/en/optimization/tgate.md | 101 +++++++++++++++------------ 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 3efaf597a619..8ef119823ecf 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -7,6 +7,7 @@ Before you begin, make sure you install T-GATE. ```bash pip install tgate pip install -U pytorch diffusers transformers accelerate DeepCache +``` To use T-GATE with a pipeline, you need to use its corresponding loader. @@ -16,11 +17,18 @@ To use T-GATE with a pipeline, you need to use its corresponding loader. | PixArt | TgatePixArtLoader | | Stable Diffusion XL | TgateSDXLLoader | | Stable Diffusion XL + DeepCache | TgateSDXLDeepCacheLoader | +| Stable Diffusion | TgateSDLoader | +| Stable Diffusion + DeepCache | TgateSDDeepCacheLoader | -Next, create a `TgateLoader` with a pipeline, the gate step(`add brief description here`), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps. +Next, create a `TgateLoader` with a pipeline, the gate step (the time step to stop calculating the cross attention), and the number of inference steps. Then call the `tgate` method on the pipeline with a prompt, gate step, and the number of inference steps. Let's see how to enable this for several different pipelines. + + + +Accelerate `PixArtAlphaPipeline` with T-GATE: + ```py import torch from diffusers import PixArtAlphaPipeline @@ -31,18 +39,20 @@ pipe = TgatePixArtLoader( pipe, gate_step=8, num_inference_steps=25, -) -pipe = pipe.to("cuda") +).to("cuda") image = pipe.tgate( "An alpaca made of colorful building blocks, cyberpunk.", gate_step=gate_step, num_inference_steps=inference_step, ).images[0] +``` + + -Accelerate `StableDiffusionXLPipeline` with TGATE: +Accelerate `StableDiffusionXLPipeline` with T-GATE: -```diff +```py import torch from diffusers import StableDiffusionXLPipeline from diffusers import DPMSolverMultistepScheduler @@ -53,29 +63,29 @@ pipe = StableDiffusionXLPipeline.from_pretrained( variant="fp16", use_safetensors=True, ) - -+ from tgate import TgateSDXLLoader -+ gate_step = 10 -+ inference_step = 25 -+ pipe = TgateSDXLLoader( -+ pipe, -+ gate_step=gate_step, -+ num_inference_steps=inference_step, -+ ) - pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) -pipe = pipe.to("cuda") -+ image = pipe.tgate( -+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", -+ gate_step=gate_step, -+ num_inference_steps=inference_step -+ ).images[0] +from tgate import TgateSDXLLoader +gate_step = 10 +inference_step = 25 +pipe = TgateSDXLLoader( + pipe, + gate_step=gate_step, + num_inference_steps=inference_step, +).to("cuda") + +image = pipe.tgate( + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step +).images[0] ``` + + -Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and TGATE: +Accelerate `StableDiffusionXLPipeline` with [DeepCache](https://github.com/horseee/DeepCache) and T-GATE: -```diff +```py import torch from diffusers import StableDiffusionXLPipeline from diffusers import DPMSolverMultistepScheduler @@ -86,18 +96,16 @@ pipe = StableDiffusionXLPipeline.from_pretrained( variant="fp16", use_safetensors=True, ) - -+ from tgate import TgateSDXLDeepCacheLoader -+ gate_step = 10 -+ inference_step = 25 -+ pipe = TgateSDXLDeepCacheLoader( -+ pipe, -+ cache_interval=3, -+ cache_branch_id=0, -+ ) - pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) -pipe = pipe.to("cuda") + +from tgate import TgateSDXLDeepCacheLoader +gate_step = 10 +inference_step = 25 +pipe = TgateSDXLDeepCacheLoader( + pipe, + cache_interval=3, + cache_branch_id=0, +).to("cuda") + image = pipe.tgate( + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", @@ -105,10 +113,12 @@ pipe = pipe.to("cuda") + num_inference_steps=inference_step + ).images[0] ``` + + -Accelerate `latent-consistency/lcm-sdxl` with TGATE: +Accelerate `latent-consistency/lcm-sdxl` with T-GATE: -```diff +```py import torch from diffusers import StableDiffusionXLPipeline from diffusers import UNet2DConditionModel, LCMScheduler @@ -135,8 +145,7 @@ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + gate_step=gate_step, + num_inference_steps=inference_step, + lcm=True -+ ) -pipe = pipe.to("cuda") ++ ).to("cuda") + image = pipe.tgate( + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", @@ -144,6 +153,8 @@ pipe = pipe.to("cuda") + num_inference_steps=inference_step + ).images[0] ``` + + T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL-2-1024-MS](https://hf.co/PixArt-alpha/PixArt-LCM-XL-2-1024-MS). @@ -151,18 +162,18 @@ T-GATE also supports [`StableDiffusionPipeline`] and [PixArt-alpha/PixArt-LCM-XL | Model | MACs | Param | Latency | Zero-shot 10K-FID on MS-COCO | |-----------------------|----------|-----------|---------|---------------------------| | SD-1.5 | 16.938T | 859.520M | 7.032s | 23.927 | -| SD-1.5 w/ TGATE | 9.875T | 815.557M | 4.313s | 20.789 | +| SD-1.5 w/ T-GATE | 9.875T | 815.557M | 4.313s | 20.789 | | SD-2.1 | 38.041T | 865.785M | 16.121s | 22.609 | -| SD-2.1 w/ TGATE | 22.208T | 815.433 M | 9.878s | 19.940 | +| SD-2.1 w/ T-GATE | 22.208T | 815.433 M | 9.878s | 19.940 | | SD-XL | 149.438T | 2.570B | 53.187s | 24.628 | -| SD-XL w/ TGATE | 84.438T | 2.024B | 27.932s | 22.738 | +| SD-XL w/ T-GATE | 84.438T | 2.024B | 27.932s | 22.738 | | Pixart-Alpha | 107.031T | 611.350M | 61.502s | 38.669 | -| Pixart-Alpha w/ TGATE | 65.318T | 462.585M | 37.867s | 35.825 | +| Pixart-Alpha w/ T-GATE | 65.318T | 462.585M | 37.867s | 35.825 | | DeepCache (SD-XL) | 57.888T | - | 19.931s | 23.755 | -| DeepCache w/ TGATE | 43.868T | - | 14.666s | 23.999 | +| DeepCache w/ T-GATE | 43.868T | - | 14.666s | 23.999 | | LCM (SD-XL) | 11.955T | 2.570B | 3.805s | 25.044 | -| LCM w/ TGATE | 11.171T | 2.024B | 3.533s | 25.028 | +| LCM w/ T-GATE | 11.171T | 2.024B | 3.533s | 25.028 | | LCM (Pixart-Alpha) | 8.563T | 611.350M | 4.733s | 36.086 | -| LCM w/ TGATE | 7.623T | 462.585M | 4.543s | 37.048 | +| LCM w/ T-GATE | 7.623T | 462.585M | 4.543s | 37.048 | The latency is tested on an NVIDIA 1080TI, MACs and Params are calculated with [calflops](https://github.com/MrYxJ/calculate-flops.pytorch), and the FID is calculated with [PytorchFID](https://github.com/mseitzer/pytorch-fid). From 2eaf934e31c6604760cc937f2380a29fc724402b Mon Sep 17 00:00:00 2001 From: Wentian <94900022+WentianZhang-ML@users.noreply.github.com> Date: Wed, 17 Apr 2024 02:19:14 +0800 Subject: [PATCH 14/14] Update tgate.md --- docs/source/en/optimization/tgate.md | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/source/en/optimization/tgate.md b/docs/source/en/optimization/tgate.md index 8ef119823ecf..0b536a215fc0 100644 --- a/docs/source/en/optimization/tgate.md +++ b/docs/source/en/optimization/tgate.md @@ -107,11 +107,11 @@ pipe = TgateSDXLDeepCacheLoader( cache_branch_id=0, ).to("cuda") -+ image = pipe.tgate( -+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", -+ gate_step=gate_step, -+ num_inference_steps=inference_step -+ ).images[0] +image = pipe.tgate( + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step +).images[0] ``` @@ -137,21 +137,21 @@ pipe = StableDiffusionXLPipeline.from_pretrained( ) pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) -+ from tgate import TgateSDXLLoader -+ gate_step = 1 -+ inference_step = 4 -+ pipe = TgateSDXLLoader( -+ pipe, -+ gate_step=gate_step, -+ num_inference_steps=inference_step, -+ lcm=True -+ ).to("cuda") - -+ image = pipe.tgate( -+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", -+ gate_step=gate_step, -+ num_inference_steps=inference_step -+ ).images[0] +from tgate import TgateSDXLLoader +gate_step = 1 +inference_step = 4 +pipe = TgateSDXLLoader( + pipe, + gate_step=gate_step, + num_inference_steps=inference_step, + lcm=True +).to("cuda") + +image = pipe.tgate( + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step +).images[0] ```