From dc6f8f52fec51b17618b8966c57c27f8d6ee7612 Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 13 Jun 2025 01:27:13 +0200 Subject: [PATCH 1/4] update --- .../loaders/lora_conversion_utils.py | 60 ++++++++++++++----- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 5b12c3aca84d..a778246365c9 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1605,10 +1605,17 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): if diff_keys: for diff_k in diff_keys: param = original_state_dict[diff_k] + threshold = 1.6e-2 + absdiff = param.abs().max() - param.abs().min() all_zero = torch.all(param == 0).item() - if all_zero: - logger.debug(f"Removed {diff_k} key from the state dict as it's all zeros.") + all_absdiff_lower_than_threshold = absdiff < threshold + if all_zero or all_absdiff_lower_than_threshold: + logger.debug( + f"Removed {diff_k} key from the state dict as it's all zeros, or values lower than hardcoded threshold." + ) original_state_dict.pop(diff_k) + else: + print(diff_k, absdiff) # For the `diff_b` keys, we treat them as lora_bias. # https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig.lora_bias @@ -1655,12 +1662,16 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): # FFN for o, c in zip(["ffn.0", "ffn.2"], ["net.0.proj", "net.2"]): - converted_state_dict[f"blocks.{i}.ffn.{c}.lora_A.weight"] = original_state_dict.pop( - f"blocks.{i}.{o}.{lora_down_key}.weight" - ) - converted_state_dict[f"blocks.{i}.ffn.{c}.lora_B.weight"] = original_state_dict.pop( - f"blocks.{i}.{o}.{lora_up_key}.weight" - ) + original_key = f"blocks.{i}.{o}.{lora_down_key}.weight" + converted_key = f"blocks.{i}.ffn.{c}.lora_A.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + original_key = f"blocks.{i}.{o}.{lora_up_key}.weight" + converted_key = f"blocks.{i}.ffn.{c}.lora_B.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + if f"blocks.{i}.{o}.diff_b" in original_state_dict: converted_state_dict[f"blocks.{i}.ffn.{c}.lora_B.bias"] = original_state_dict.pop( f"blocks.{i}.{o}.diff_b" @@ -1669,12 +1680,16 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): # Remaining. if original_state_dict: if any("time_projection" in k for k in original_state_dict): - converted_state_dict["condition_embedder.time_proj.lora_A.weight"] = original_state_dict.pop( - f"time_projection.1.{lora_down_key}.weight" - ) - converted_state_dict["condition_embedder.time_proj.lora_B.weight"] = original_state_dict.pop( - f"time_projection.1.{lora_up_key}.weight" - ) + original_key = f"time_projection.1.{lora_down_key}.weight" + converted_key = "condition_embedder.time_proj.lora_A.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + original_key = f"time_projection.1.{lora_up_key}.weight" + converted_key = "condition_embedder.time_proj.lora_B.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + if "time_projection.1.diff_b" in original_state_dict: converted_state_dict["condition_embedder.time_proj.lora_B.bias"] = original_state_dict.pop( "time_projection.1.diff_b" @@ -1709,6 +1724,23 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): original_state_dict.pop(f"{text_time}.{b_n}.diff_b") ) + for img_ours, img_theirs in [ + ( + "ff.net.0.proj", + "img_emb.proj.1" + ), + ("ff.net.2", "img_emb.proj.3"), + ]: + original_key = f"{img_theirs}.{lora_down_key}.weight" + converted_key = f"condition_embedder.image_embedder.{img_ours}.lora_A.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + original_key = f"{img_theirs}.{lora_up_key}.weight" + converted_key = f"condition_embedder.image_embedder.{img_ours}.lora_B.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + if len(original_state_dict) > 0: diff = all(".diff" in k for k in original_state_dict) if diff: From d4b7f980023e45ebf92c01dc7ae841543244351b Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 13 Jun 2025 01:29:13 +0200 Subject: [PATCH 2/4] make style --- src/diffusers/loaders/lora_conversion_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index a778246365c9..77837ae07b26 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1725,10 +1725,7 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): ) for img_ours, img_theirs in [ - ( - "ff.net.0.proj", - "img_emb.proj.1" - ), + ("ff.net.0.proj", "img_emb.proj.1"), ("ff.net.2", "img_emb.proj.3"), ]: original_key = f"{img_theirs}.{lora_down_key}.weight" From 88136f579a0ddb973dda921730398ed498c5e167 Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 13 Jun 2025 05:01:43 +0530 Subject: [PATCH 3/4] Update src/diffusers/loaders/lora_conversion_utils.py --- src/diffusers/loaders/lora_conversion_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 77837ae07b26..de06b4197748 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1614,8 +1614,6 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): f"Removed {diff_k} key from the state dict as it's all zeros, or values lower than hardcoded threshold." ) original_state_dict.pop(diff_k) - else: - print(diff_k, absdiff) # For the `diff_b` keys, we treat them as lora_bias. # https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig.lora_bias From 1e66d3d5eae67c9918aef9655e356fe5d0bdd5f9 Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 13 Jun 2025 08:05:31 +0200 Subject: [PATCH 4/4] add note explaining threshold --- src/diffusers/loaders/lora_conversion_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index de06b4197748..7bde2a00be97 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -1605,6 +1605,10 @@ def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): if diff_keys: for diff_k in diff_keys: param = original_state_dict[diff_k] + # The magnitudes of the .diff-ending weights are very low (most are below 1e-4, some are upto 1e-3, + # and 2 of them are about 1.6e-2 [the case with AccVideo lora]). The low magnitudes mostly correspond + # to norm layers. Ignoring them is the best option at the moment until a better solution is found. It + # is okay to ignore because they do not affect the model output in a significant manner. threshold = 1.6e-2 absdiff = param.abs().max() - param.abs().min() all_zero = torch.all(param == 0).item()