From 8c2b2cdc52909c59d9b5f9417b9dd527396594d8 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 1 Apr 2025 22:20:28 +0530
Subject: [PATCH 1/3] update

---
 src/diffusers/models/transformers/transformer_wan.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/transformers/transformer_wan.py b/src/diffusers/models/transformers/transformer_wan.py
index 4eb4add37601..aa03e97093aa 100644
--- a/src/diffusers/models/transformers/transformer_wan.py
+++ b/src/diffusers/models/transformers/transformer_wan.py
@@ -24,6 +24,7 @@
 from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
 from ..attention import FeedForward
 from ..attention_processor import Attention
+from ..cache_utils import CacheMixin
 from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed
 from ..modeling_outputs import Transformer2DModelOutput
 from ..modeling_utils import ModelMixin
@@ -288,7 +289,7 @@ def forward(
         return hidden_states
 
 
-class WanTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
+class WanTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin):
     r"""
     A Transformer model for video-like data used in the Wan model.
 

From 5ac65c4513801954f633fd1361bfe56b98a333d6 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 1 Apr 2025 23:00:21 +0530
Subject: [PATCH 2/3] update

---
 src/diffusers/models/transformers/transformer_ltx.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/transformers/transformer_ltx.py b/src/diffusers/models/transformers/transformer_ltx.py
index c1f2df587927..2ae2418098f6 100644
--- a/src/diffusers/models/transformers/transformer_ltx.py
+++ b/src/diffusers/models/transformers/transformer_ltx.py
@@ -26,6 +26,7 @@
 from ...utils.torch_utils import maybe_allow_in_graph
 from ..attention import FeedForward
 from ..attention_processor import Attention
+from ..cache_utils import CacheMixin
 from ..embeddings import PixArtAlphaTextProjection
 from ..modeling_outputs import Transformer2DModelOutput
 from ..modeling_utils import ModelMixin
@@ -298,7 +299,7 @@ def forward(
 
 
 @maybe_allow_in_graph
-class LTXVideoTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin):
+class LTXVideoTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin, CacheMixin):
     r"""
     A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video).
 

From ca5cfbd37e0f37987e9685926f7a2b0c2f93ac48 Mon Sep 17 00:00:00 2001
From: Dhruv Nair <dhruv.nair@gmail.com>
Date: Wed, 2 Apr 2025 20:33:17 +0200
Subject: [PATCH 3/3] update

---
 src/diffusers/pipelines/ltx/pipeline_ltx.py             | 7 +++++++
 src/diffusers/pipelines/ltx/pipeline_ltx_condition.py   | 7 +++++++
 src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py | 7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx.py b/src/diffusers/pipelines/ltx/pipeline_ltx.py
index f7b0811d1a22..6f3faed8ff72 100644
--- a/src/diffusers/pipelines/ltx/pipeline_ltx.py
+++ b/src/diffusers/pipelines/ltx/pipeline_ltx.py
@@ -489,6 +489,10 @@ def do_classifier_free_guidance(self):
     def num_timesteps(self):
         return self._num_timesteps
 
+    @property
+    def current_timestep(self):
+        return self._current_timestep
+
     @property
     def attention_kwargs(self):
         return self._attention_kwargs
@@ -622,6 +626,7 @@ def __call__(
         self._guidance_scale = guidance_scale
         self._attention_kwargs = attention_kwargs
         self._interrupt = False
+        self._current_timestep = None
 
         # 2. Define call parameters
         if prompt is not None and isinstance(prompt, str):
@@ -706,6 +711,8 @@ def __call__(
                 if self.interrupt:
                     continue
 
+                self._current_timestep = t
+
                 latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
                 latent_model_input = latent_model_input.to(prompt_embeds.dtype)
 
diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py b/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py
index e7f3666cb2c7..ef1fd568397f 100644
--- a/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py
+++ b/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py
@@ -774,6 +774,10 @@ def do_classifier_free_guidance(self):
     def num_timesteps(self):
         return self._num_timesteps
 
+    @property
+    def current_timestep(self):
+        return self._current_timestep
+
     @property
     def attention_kwargs(self):
         return self._attention_kwargs
@@ -933,6 +937,7 @@ def __call__(
         self._guidance_scale = guidance_scale
         self._attention_kwargs = attention_kwargs
         self._interrupt = False
+        self._current_timestep = None
 
         # 2. Define call parameters
         if prompt is not None and isinstance(prompt, str):
@@ -1066,6 +1071,8 @@ def __call__(
                 if self.interrupt:
                     continue
 
+                self._current_timestep = t
+
                 if image_cond_noise_scale > 0:
                     # Add timestep-dependent noise to the hard-conditioning latents
                     # This helps with motion continuity, especially when conditioned on a single frame
diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py b/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py
index 0f640dc33546..1ae67967c6f5 100644
--- a/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py
+++ b/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py
@@ -550,6 +550,10 @@ def do_classifier_free_guidance(self):
     def num_timesteps(self):
         return self._num_timesteps
 
+    @property
+    def current_timestep(self):
+        return self._current_timestep
+
     @property
     def attention_kwargs(self):
         return self._attention_kwargs
@@ -686,6 +690,7 @@ def __call__(
         self._guidance_scale = guidance_scale
         self._attention_kwargs = attention_kwargs
         self._interrupt = False
+        self._current_timestep = None
 
         # 2. Define call parameters
         if prompt is not None and isinstance(prompt, str):
@@ -778,6 +783,8 @@ def __call__(
                 if self.interrupt:
                     continue
 
+                self._current_timestep = t
+
                 latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
                 latent_model_input = latent_model_input.to(prompt_embeds.dtype)