From ca6b5119b4fd8cb775cb042711901fd62f7c8332 Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 25 Apr 2025 09:50:25 +0800 Subject: [PATCH 1/2] Fix typos in docs and comments --- docs/source/en/api/pipelines/animatediff.md | 2 +- docs/source/en/api/pipelines/ledits_pp.md | 2 +- docs/source/en/api/pipelines/wan.md | 4 ++-- docs/source/en/using-diffusers/inference_with_lcm.md | 4 ++-- docs/source/en/using-diffusers/pag.md | 6 +++--- examples/advanced_diffusion_training/README.md | 4 ++-- examples/advanced_diffusion_training/README_flux.md | 2 +- examples/amused/README.md | 2 +- examples/cogvideo/README.md | 2 +- .../cogvideo/train_cogvideox_image_to_video_lora.py | 2 +- examples/cogvideo/train_cogvideox_lora.py | 2 +- examples/community/README.md | 2 +- examples/community/dps_pipeline.py | 4 ++-- examples/community/hd_painter.py | 12 ++++++------ examples/community/img2img_inpainting.py | 2 +- examples/community/latent_consistency_img2img.py | 4 ++-- examples/community/magic_mix.py | 2 +- examples/community/mixture_tiling.py | 6 +++--- examples/community/pipeline_controlnet_xl_kolors.py | 2 +- .../pipeline_controlnet_xl_kolors_img2img.py | 2 +- .../pipeline_controlnet_xl_kolors_inpaint.py | 2 +- examples/community/pipeline_fabric.py | 2 +- .../pipeline_faithdiff_stable_diffusion_xl.py | 2 +- .../community/pipeline_stable_diffusion_boxdiff.py | 2 +- .../pipeline_stable_diffusion_xl_attentive_eraser.py | 4 ++-- ...ipeline_stable_diffusion_xl_controlnet_adapter.py | 2 +- ...stable_diffusion_xl_controlnet_adapter_inpaint.py | 2 +- .../community/regional_prompting_stable_diffusion.py | 6 +++--- examples/community/sde_drag.py | 2 +- examples/community/unclip_image_interpolation.py | 2 +- .../train_lcm_distill_lora_sd_wds.py | 2 +- .../train_lcm_distill_lora_sdxl.py | 2 +- .../train_lcm_distill_lora_sdxl_wds.py | 2 +- .../train_lcm_distill_sd_wds.py | 2 +- .../train_lcm_distill_sdxl_wds.py | 2 +- examples/controlnet/README_flux.md | 2 +- examples/dreambooth/README_flux.md | 4 ++-- examples/dreambooth/README_hidream.md | 2 +- examples/dreambooth/README_lumina2.md | 2 +- examples/dreambooth/README_sana.md | 2 +- examples/dreambooth/train_dreambooth_lora_flux.py | 2 +- examples/dreambooth/train_dreambooth_lora_hidream.py | 2 +- examples/dreambooth/train_dreambooth_lora_lumina2.py | 2 +- examples/dreambooth/train_dreambooth_lora_sana.py | 2 +- examples/dreambooth/train_dreambooth_lora_sd3.py | 4 ++-- examples/dreambooth/train_dreambooth_lora_sdxl.py | 2 +- examples/flux-control/train_control_lora_flux.py | 2 +- .../train_text_to_image_lora_decoder.py | 2 +- .../train_cm_ct_unconditional.py | 2 +- .../flux_lora_quantization/README.md | 2 +- .../research_projects/intel_opts/inference_bf16.py | 2 +- .../intel_opts/textual_inversion_dfq/README.md | 2 +- .../pixart/pipeline_pixart_alpha_controlnet.py | 2 +- .../pixart/train_pixart_controlnet_hf.py | 2 +- .../pytorch_xla/inference/flux/flux_inference.py | 4 ++-- .../research_projects/realfill/train_realfill.py | 2 +- .../dreambooth/train_dreambooth_lora_sdxl.py | 2 +- examples/textual_inversion/textual_inversion.py | 2 +- examples/textual_inversion/textual_inversion_sdxl.py | 2 +- scripts/convert_flux_to_diffusers.py | 2 +- scripts/convert_sana_to_diffusers.py | 2 +- scripts/convert_shap_e_to_diffusers.py | 2 +- scripts/convert_wuerstchen.py | 8 ++++---- src/diffusers/hooks/group_offloading.py | 2 +- src/diffusers/hooks/layerwise_casting.py | 2 +- src/diffusers/loaders/lora_conversion_utils.py | 10 +++++----- src/diffusers/loaders/lora_pipeline.py | 2 +- src/diffusers/loaders/peft.py | 8 ++++---- src/diffusers/loaders/single_file.py | 2 +- src/diffusers/loaders/single_file_utils.py | 4 ++-- src/diffusers/loaders/transformer_sd3.py | 2 +- src/diffusers/models/controlnets/controlnet_xs.py | 10 +++++----- src/diffusers/models/embeddings.py | 4 ++-- .../models/transformers/latte_transformer_3d.py | 4 ++-- .../models/transformers/lumina_nextdit2d.py | 2 +- src/diffusers/models/unets/unet_i2vgen_xl.py | 2 +- src/diffusers/pipelines/amused/pipeline_amused.py | 2 +- .../pipelines/audioldm2/pipeline_audioldm2.py | 6 +++--- .../blip_diffusion/pipeline_blip_diffusion.py | 2 +- .../controlnet/pipeline_controlnet_blip_diffusion.py | 2 +- .../controlnet_xs/pipeline_controlnet_xs.py | 2 +- .../controlnet_xs/pipeline_controlnet_xs_sd_xl.py | 2 +- .../dance_diffusion/pipeline_dance_diffusion.py | 2 +- .../pipeline_stable_diffusion_model_editing.py | 2 +- .../versatile_diffusion/modeling_text_unet.py | 2 +- src/diffusers/pipelines/free_noise_utils.py | 2 +- .../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 2 +- .../kandinsky/pipeline_kandinsky_inpaint.py | 2 +- .../kandinsky2_2/pipeline_kandinsky2_2_inpainting.py | 2 +- src/diffusers/pipelines/kolors/text_encoder.py | 2 +- .../ledits_pp/pipeline_leditspp_stable_diffusion.py | 2 +- .../pipeline_leditspp_stable_diffusion_xl.py | 2 +- .../pipelines/marigold/marigold_image_processing.py | 4 ++-- src/diffusers/pipelines/omnigen/pipeline_omnigen.py | 2 +- .../pag/pipeline_pag_controlnet_sd_xl_img2img.py | 2 +- src/diffusers/pipelines/shap_e/renderer.py | 2 +- .../stable_cascade/pipeline_stable_cascade.py | 4 ++-- .../stable_cascade/pipeline_stable_cascade_prior.py | 6 +++--- .../pipeline_stable_diffusion_attend_and_excite.py | 2 +- .../pipeline_stable_diffusion_k_diffusion.py | 2 +- .../t2i_adapter/pipeline_stable_diffusion_adapter.py | 2 +- .../pipeline_stable_diffusion_xl_adapter.py | 2 +- .../pipelines/unidiffuser/modeling_text_decoder.py | 2 +- .../pipelines/unidiffuser/pipeline_unidiffuser.py | 2 +- src/diffusers/quantizers/base.py | 2 +- src/diffusers/quantizers/bitsandbytes/utils.py | 4 ++-- src/diffusers/quantizers/gguf/gguf_quantizer.py | 4 ++-- .../quantizers/torchao/torchao_quantizer.py | 2 +- .../schedulers/scheduling_dpmsolver_singlestep.py | 2 +- src/diffusers/utils/export_utils.py | 2 +- src/diffusers/utils/peft_utils.py | 8 ++++---- src/diffusers/utils/state_dict_utils.py | 4 ++-- src/diffusers/utils/torch_utils.py | 2 +- src/diffusers/video_processor.py | 2 +- .../pipelines/wuerstchen/test_wuerstchen_decoder.py | 2 +- 115 files changed, 164 insertions(+), 164 deletions(-) diff --git a/docs/source/en/api/pipelines/animatediff.md b/docs/source/en/api/pipelines/animatediff.md index ed5ced7dbbc7..b4d347dc6ade 100644 --- a/docs/source/en/api/pipelines/animatediff.md +++ b/docs/source/en/api/pipelines/animatediff.md @@ -966,7 +966,7 @@ pipe.to("cuda") prompt = { 0: "A caterpillar on a leaf, high quality, photorealistic", 40: "A caterpillar transforming into a cocoon, on a leaf, near flowers, photorealistic", - 80: "A cocoon on a leaf, flowers in the backgrond, photorealistic", + 80: "A cocoon on a leaf, flowers in the background, photorealistic", 120: "A cocoon maturing and a butterfly being born, flowers and leaves visible in the background, photorealistic", 160: "A beautiful butterfly, vibrant colors, sitting on a leaf, flowers in the background, photorealistic", 200: "A beautiful butterfly, flying away in a forest, photorealistic", diff --git a/docs/source/en/api/pipelines/ledits_pp.md b/docs/source/en/api/pipelines/ledits_pp.md index 0dc4b536ab42..7c08971aa8d9 100644 --- a/docs/source/en/api/pipelines/ledits_pp.md +++ b/docs/source/en/api/pipelines/ledits_pp.md @@ -29,7 +29,7 @@ You can find additional information about LEDITS++ on the [project page](https:/ -Due to some backward compatability issues with the current diffusers implementation of [`~schedulers.DPMSolverMultistepScheduler`] this implementation of LEdits++ can no longer guarantee perfect inversion. +Due to some backward compatibility issues with the current diffusers implementation of [`~schedulers.DPMSolverMultistepScheduler`] this implementation of LEdits++ can no longer guarantee perfect inversion. This issue is unlikely to have any noticeable effects on applied use-cases. However, we provide an alternative implementation that guarantees perfect inversion in a dedicated [GitHub repo](https://github.com/ml-research/ledits_pp). diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md index dbf3b973d79c..09503125f5c5 100644 --- a/docs/source/en/api/pipelines/wan.md +++ b/docs/source/en/api/pipelines/wan.md @@ -285,7 +285,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( image_encoder=image_encoder, torch_dtype=torch.bfloat16 ) -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU pipe.to("cuda") image = load_image( @@ -368,7 +368,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( image_encoder=image_encoder, torch_dtype=torch.bfloat16 ) -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU pipe.to("cuda") image = load_image( diff --git a/docs/source/en/using-diffusers/inference_with_lcm.md b/docs/source/en/using-diffusers/inference_with_lcm.md index 02b0a9bda312..dba4aeb4f6c6 100644 --- a/docs/source/en/using-diffusers/inference_with_lcm.md +++ b/docs/source/en/using-diffusers/inference_with_lcm.md @@ -485,7 +485,7 @@ image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image).resize((1024, 1216)) -adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda") +adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, variant="fp16").to("cuda") unet = UNet2DConditionModel.from_pretrained( "latent-consistency/lcm-sdxl", @@ -551,7 +551,7 @@ image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image).resize((1024, 1024)) -adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda") +adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, variant="fp16").to("cuda") pipe = StableDiffusionXLAdapterPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", diff --git a/docs/source/en/using-diffusers/pag.md b/docs/source/en/using-diffusers/pag.md index 26961d959c49..1af690f86ac6 100644 --- a/docs/source/en/using-diffusers/pag.md +++ b/docs/source/en/using-diffusers/pag.md @@ -154,11 +154,11 @@ pipeline = AutoPipelineForInpainting.from_pretrained( pipeline.enable_model_cpu_offload() ``` -You can enable PAG on an exisiting inpainting pipeline like this +You can enable PAG on an existing inpainting pipeline like this ```py -pipeline_inpaint = AutoPipelineForInpaiting.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) -pipeline = AutoPipelineForInpaiting.from_pipe(pipeline_inpaint, enable_pag=True) +pipeline_inpaint = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) +pipeline = AutoPipelineForInpainting.from_pipe(pipeline_inpaint, enable_pag=True) ``` This still works when your pipeline has a different task: diff --git a/examples/advanced_diffusion_training/README.md b/examples/advanced_diffusion_training/README.md index 504ae1471f44..f30f8c83a13d 100644 --- a/examples/advanced_diffusion_training/README.md +++ b/examples/advanced_diffusion_training/README.md @@ -125,7 +125,7 @@ Now we'll simply specify the name of the dataset and caption column (in this cas ``` You can also load a dataset straight from by specifying it's name in `dataset_name`. -Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loadin your own caption dataset. +Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loading your own caption dataset. - **optimizer**: for this example, we'll use [prodigy](https://huggingface.co/blog/sdxl_lora_advanced_script#adaptive-optimizers) - an adaptive optimizer - **pivotal tuning** @@ -404,7 +404,7 @@ The advanced script now supports custom choice of U-net blocks to train during D > In light of this, we're introducing a new feature to the advanced script to allow for configurable U-net learned blocks. **Usage** -Configure LoRA learned U-net blocks adding a `lora_unet_blocks` flag, with a comma seperated string specifying the targeted blocks. +Configure LoRA learned U-net blocks adding a `lora_unet_blocks` flag, with a comma separated string specifying the targeted blocks. e.g: ```bash --lora_unet_blocks="unet.up_blocks.0.attentions.0,unet.up_blocks.0.attentions.1" diff --git a/examples/advanced_diffusion_training/README_flux.md b/examples/advanced_diffusion_training/README_flux.md index f2a571d5eae4..ded6f11314e4 100644 --- a/examples/advanced_diffusion_training/README_flux.md +++ b/examples/advanced_diffusion_training/README_flux.md @@ -141,7 +141,7 @@ Now we'll simply specify the name of the dataset and caption column (in this cas ``` You can also load a dataset straight from by specifying it's name in `dataset_name`. -Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loadin your own caption dataset. +Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loading your own caption dataset. - **optimizer**: for this example, we'll use [prodigy](https://huggingface.co/blog/sdxl_lora_advanced_script#adaptive-optimizers) - an adaptive optimizer - **pivotal tuning** diff --git a/examples/amused/README.md b/examples/amused/README.md index 1230bd866757..9d5ae17ef54f 100644 --- a/examples/amused/README.md +++ b/examples/amused/README.md @@ -1,6 +1,6 @@ ## Amused training -Amused can be finetuned on simple datasets relatively cheaply and quickly. Using 8bit optimizers, lora, and gradient accumulation, amused can be finetuned with as little as 5.5 GB. Here are a set of examples for finetuning amused on some relatively simple datasets. These training recipies are aggressively oriented towards minimal resources and fast verification -- i.e. the batch sizes are quite low and the learning rates are quite high. For optimal quality, you will probably want to increase the batch sizes and decrease learning rates. +Amused can be finetuned on simple datasets relatively cheaply and quickly. Using 8bit optimizers, lora, and gradient accumulation, amused can be finetuned with as little as 5.5 GB. Here are a set of examples for finetuning amused on some relatively simple datasets. These training recipes are aggressively oriented towards minimal resources and fast verification -- i.e. the batch sizes are quite low and the learning rates are quite high. For optimal quality, you will probably want to increase the batch sizes and decrease learning rates. All training examples use fp16 mixed precision and gradient checkpointing. We don't show 8 bit adam + lora as its about the same memory use as just using lora (bitsandbytes uses full precision optimizer states for weights below a minimum size). diff --git a/examples/cogvideo/README.md b/examples/cogvideo/README.md index 02887faeaa74..6cb0a51e9fc9 100644 --- a/examples/cogvideo/README.md +++ b/examples/cogvideo/README.md @@ -201,7 +201,7 @@ Note that setting the `` is not necessary. From some limited experimen > - The original repository uses a `lora_alpha` of `1`. We found this not suitable in many runs, possibly due to difference in modeling backends and training settings. Our recommendation is to set to the `lora_alpha` to either `rank` or `rank // 2`. > - If you're training on data whose captions generate bad results with the original model, a `rank` of 64 and above is good and also the recommendation by the team behind CogVideoX. If the generations are already moderately good on your training captions, a `rank` of 16/32 should work. We found that setting the rank too low, say `4`, is not ideal and doesn't produce promising results. > - The authors of CogVideoX recommend 4000 training steps and 100 training videos overall to achieve the best result. While that might yield the best results, we found from our limited experimentation that 2000 steps and 25 videos could also be sufficient. -> - When using the Prodigy opitimizer for training, one can follow the recommendations from [this](https://huggingface.co/blog/sdxl_lora_advanced_script) blog. Prodigy tends to overfit quickly. From my very limited testing, I found a learning rate of `0.5` to be suitable in addition to `--prodigy_use_bias_correction`, `prodigy_safeguard_warmup` and `--prodigy_decouple`. +> - When using the Prodigy optimizer for training, one can follow the recommendations from [this](https://huggingface.co/blog/sdxl_lora_advanced_script) blog. Prodigy tends to overfit quickly. From my very limited testing, I found a learning rate of `0.5` to be suitable in addition to `--prodigy_use_bias_correction`, `prodigy_safeguard_warmup` and `--prodigy_decouple`. > - The recommended learning rate by the CogVideoX authors and from our experimentation with Adam/AdamW is between `1e-3` and `1e-4` for a dataset of 25+ videos. > > Note that our testing is not exhaustive due to limited time for exploration. Our recommendation would be to play around with the different knobs and dials to find the best settings for your data. diff --git a/examples/cogvideo/train_cogvideox_image_to_video_lora.py b/examples/cogvideo/train_cogvideox_image_to_video_lora.py index af69d45974fa..642aecabf74f 100644 --- a/examples/cogvideo/train_cogvideox_image_to_video_lora.py +++ b/examples/cogvideo/train_cogvideox_image_to_video_lora.py @@ -879,7 +879,7 @@ def prepare_rotary_positional_embeddings( def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): - # Use DeepSpeed optimzer + # Use DeepSpeed optimizer if use_deepspeed: from accelerate.utils import DummyOptim diff --git a/examples/cogvideo/train_cogvideox_lora.py b/examples/cogvideo/train_cogvideox_lora.py index 71f9bcc61b50..e737ce76241f 100644 --- a/examples/cogvideo/train_cogvideox_lora.py +++ b/examples/cogvideo/train_cogvideox_lora.py @@ -901,7 +901,7 @@ def prepare_rotary_positional_embeddings( def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): - # Use DeepSpeed optimzer + # Use DeepSpeed optimizer if use_deepspeed: from accelerate.utils import DummyOptim diff --git a/examples/community/README.md b/examples/community/README.md index 3b1218dc2727..3117070b204e 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -4865,7 +4865,7 @@ python -m pip install intel_extension_for_pytorch ``` python -m pip install intel_extension_for_pytorch== -f https://developer.intel.com/ipex-whl-stable-cpu ``` -2. After pipeline initialization, `prepare_for_ipex()` should be called to enable IPEX accelaration. Supported inference datatypes are Float32 and BFloat16. +2. After pipeline initialization, `prepare_for_ipex()` should be called to enable IPEX acceleration. Supported inference datatypes are Float32 and BFloat16. ```python pipe = AnimateDiffPipelineIpex.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device) diff --git a/examples/community/dps_pipeline.py b/examples/community/dps_pipeline.py index 7b349f6693e7..5442bcc65175 100755 --- a/examples/community/dps_pipeline.py +++ b/examples/community/dps_pipeline.py @@ -336,13 +336,13 @@ def contributions(self, in_length, out_length, scale, kernel, kernel_width, anti expanded_kernel_width = np.ceil(kernel_width) + 2 # Determine a set of field_of_view for each each output position, these are the pixels in the input image - # that the pixel in the output image 'sees'. We get a matrix whos horizontal dim is the output pixels (big) and the + # that the pixel in the output image 'sees'. We get a matrix whose horizontal dim is the output pixels (big) and the # vertical dim is the pixels it 'sees' (kernel_size + 2) field_of_view = np.squeeze( np.int16(np.expand_dims(left_boundary, axis=1) + np.arange(expanded_kernel_width) - 1) ) - # Assign weight to each pixel in the field of view. A matrix whos horizontal dim is the output pixels and the + # Assign weight to each pixel in the field of view. A matrix whose horizontal dim is the output pixels and the # vertical dim is a list of weights matching to the pixel in the field of view (that are specified in # 'field_of_view') weights = fixed_kernel(1.0 * np.expand_dims(match_coordinates, axis=1) - field_of_view - 1) diff --git a/examples/community/hd_painter.py b/examples/community/hd_painter.py index 9711b40b117e..20bb43a76f32 100644 --- a/examples/community/hd_painter.py +++ b/examples/community/hd_painter.py @@ -201,16 +201,16 @@ def __call__( # ================================================== # # We use a hack by running the code from the BasicTransformerBlock that is between Self and Cross attentions here # The other option would've been modifying the BasicTransformerBlock and adding this functionality here. - # I assumed that changing the BasicTransformerBlock would have been a bigger deal and decided to use this hack isntead. + # I assumed that changing the BasicTransformerBlock would have been a bigger deal and decided to use this hack instead. - # The SelfAttention block recieves the normalized latents from the BasicTransformerBlock, + # The SelfAttention block receives the normalized latents from the BasicTransformerBlock, # But the residual of the output is the non-normalized version. # Therefore we unnormalize the input hidden state here unnormalized_input_hidden_states = ( input_hidden_states + self.transformer_block.norm1.bias ) * self.transformer_block.norm1.weight - # TODO: return if neccessary + # TODO: return if necessary # if self.use_ada_layer_norm_zero: # attn_output = gate_msa.unsqueeze(1) * attn_output # elif self.use_ada_layer_norm_single: @@ -220,7 +220,7 @@ def __call__( if transformer_hidden_states.ndim == 4: transformer_hidden_states = transformer_hidden_states.squeeze(1) - # TODO: return if neccessary + # TODO: return if necessary # 2.5 GLIGEN Control # if gligen_kwargs is not None: # transformer_hidden_states = self.fuser(transformer_hidden_states, gligen_kwargs["objs"]) @@ -266,7 +266,7 @@ def __call__( ) = cross_attention_input_hidden_states.chunk(2) # Same split for the encoder_hidden_states i.e. the tokens - # Since the SelfAttention processors don't get the encoder states as input, we inject them into the processor in the begining. + # Since the SelfAttention processors don't get the encoder states as input, we inject them into the processor in the beginning. _encoder_hidden_states_unconditional, encoder_hidden_states_conditional = self.encoder_hidden_states.chunk( 2 ) @@ -896,7 +896,7 @@ def __call__( class GaussianSmoothing(nn.Module): """ Apply gaussian smoothing on a - 1d, 2d or 3d tensor. Filtering is performed seperately for each channel + 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. Args: diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py index 001e4cc5b2cf..c6de02789759 100644 --- a/examples/community/img2img_inpainting.py +++ b/examples/community/img2img_inpainting.py @@ -161,7 +161,7 @@ def __call__( `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will be masked out with `mask_image` and repainted according to `prompt`. inner_image (`torch.Tensor` or `PIL.Image.Image`): - `Image`, or tensor representing an image batch which will be overlayed onto `image`. Non-transparent + `Image`, or tensor representing an image batch which will be overlaid onto `image`. Non-transparent regions of `inner_image` must fit inside white pixels in `mask_image`. Expects four channels, with the last channel representing the alpha channel, which will be used to blend `inner_image` with `image`. If not provided, it will be forcibly cast to RGBA. diff --git a/examples/community/latent_consistency_img2img.py b/examples/community/latent_consistency_img2img.py index 6c532c7f76c1..01abf861b827 100644 --- a/examples/community/latent_consistency_img2img.py +++ b/examples/community/latent_consistency_img2img.py @@ -647,7 +647,7 @@ def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: return sample def set_timesteps( - self, stength, num_inference_steps: int, lcm_origin_steps: int, device: Union[str, torch.device] = None + self, strength, num_inference_steps: int, lcm_origin_steps: int, device: Union[str, torch.device] = None ): """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -668,7 +668,7 @@ def set_timesteps( # LCM Timesteps Setting: # Linear Spacing c = self.config.num_train_timesteps // lcm_origin_steps lcm_origin_timesteps = ( - np.asarray(list(range(1, int(lcm_origin_steps * stength) + 1))) * c - 1 + np.asarray(list(range(1, int(lcm_origin_steps * strength) + 1))) * c - 1 ) # LCM Training Steps Schedule skipping_step = len(lcm_origin_timesteps) // num_inference_steps timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps] # LCM Inference Steps Schedule diff --git a/examples/community/magic_mix.py b/examples/community/magic_mix.py index d3d118f84bfc..a29d0cfa0988 100644 --- a/examples/community/magic_mix.py +++ b/examples/community/magic_mix.py @@ -129,7 +129,7 @@ def __call__( input = ( (mix_factor * latents) + (1 - mix_factor) * orig_latents - ) # interpolating between layout noise and conditionally generated noise to preserve layout sematics + ) # interpolating between layout noise and conditionally generated noise to preserve layout semantics input = torch.cat([input] * 2) else: # content generation phase diff --git a/examples/community/mixture_tiling.py b/examples/community/mixture_tiling.py index 867bce0d9eb8..3feed5c88def 100644 --- a/examples/community/mixture_tiling.py +++ b/examples/community/mixture_tiling.py @@ -196,9 +196,9 @@ def __call__( guidance_scale_tiles: specific weights for classifier-free guidance in each tile. guidance_scale_tiles: specific weights for classifier-free guidance in each tile. If None, the value provided in guidance_scale will be used. seed_tiles: specific seeds for the initialization latents in each tile. These will override the latents generated for the whole canvas using the standard seed parameter. - seed_tiles_mode: either "full" "exclusive". If "full", all the latents affected by the tile be overriden. If "exclusive", only the latents that are affected exclusively by this tile (and no other tiles) will be overriden. - seed_reroll_regions: a list of tuples in the form (start row, end row, start column, end column, seed) defining regions in pixel space for which the latents will be overriden using the given seed. Takes priority over seed_tiles. - cpu_vae: the decoder from latent space to pixel space can require too mucho GPU RAM for large images. If you find out of memory errors at the end of the generation process, try setting this parameter to True to run the decoder in CPU. Slower, but should run without memory issues. + seed_tiles_mode: either "full" "exclusive". If "full", all the latents affected by the tile be overridden. If "exclusive", only the latents that are affected exclusively by this tile (and no other tiles) will be overridden. + seed_reroll_regions: a list of tuples in the form (start row, end row, start column, end column, seed) defining regions in pixel space for which the latents will be overridden using the given seed. Takes priority over seed_tiles. + cpu_vae: the decoder from latent space to pixel space can require too much GPU RAM for large images. If you find out of memory errors at the end of the generation process, try setting this parameter to True to run the decoder in CPU. Slower, but should run without memory issues. Examples: diff --git a/examples/community/pipeline_controlnet_xl_kolors.py b/examples/community/pipeline_controlnet_xl_kolors.py index b805c9a04a07..5b0576fbcdcf 100644 --- a/examples/community/pipeline_controlnet_xl_kolors.py +++ b/examples/community/pipeline_controlnet_xl_kolors.py @@ -1258,7 +1258,7 @@ def _cn_patch_forward(*args, **kwargs): ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_controlnet_xl_kolors_img2img.py b/examples/community/pipeline_controlnet_xl_kolors_img2img.py index 5cfb98d9694f..44c866e8261f 100644 --- a/examples/community/pipeline_controlnet_xl_kolors_img2img.py +++ b/examples/community/pipeline_controlnet_xl_kolors_img2img.py @@ -1462,7 +1462,7 @@ def _cn_patch_forward(*args, **kwargs): ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_controlnet_xl_kolors_inpaint.py b/examples/community/pipeline_controlnet_xl_kolors_inpaint.py index 68d1153d0dea..09d4b0241e47 100644 --- a/examples/community/pipeline_controlnet_xl_kolors_inpaint.py +++ b/examples/community/pipeline_controlnet_xl_kolors_inpaint.py @@ -1782,7 +1782,7 @@ def _cn_patch_forward(*args, **kwargs): ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_fabric.py b/examples/community/pipeline_fabric.py index 30847f875bda..2eddbd06ceae 100644 --- a/examples/community/pipeline_fabric.py +++ b/examples/community/pipeline_fabric.py @@ -559,7 +559,7 @@ def __call__( End point for providing feedback (between 0 and 1). min_weight (`float`, *optional*, defaults to `.05`): Minimum weight for feedback. - max_weight (`float`, *optional*, defults tp `1.0`): + max_weight (`float`, *optional*, defaults tp `1.0`): Maximum weight for feedback. neg_scale (`float`, *optional*, defaults to `.5`): Scale factor for negative feedback. diff --git a/examples/community/pipeline_faithdiff_stable_diffusion_xl.py b/examples/community/pipeline_faithdiff_stable_diffusion_xl.py index d1d3d80b4a60..749f0322d03d 100644 --- a/examples/community/pipeline_faithdiff_stable_diffusion_xl.py +++ b/examples/community/pipeline_faithdiff_stable_diffusion_xl.py @@ -118,7 +118,7 @@ >>> # Here we need use pipeline internal unet model >>> pipe.unet = pipe.unet_model.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True) >>> - >>> # Load aditional layers to the model + >>> # Load additional layers to the model >>> pipe.unet.load_additional_layers(weight_path="proc_data/faithdiff/FaithDiff.bin", dtype=dtype) >>> >>> # Enable vae tiling diff --git a/examples/community/pipeline_stable_diffusion_boxdiff.py b/examples/community/pipeline_stable_diffusion_boxdiff.py index bd58a65ce787..7c7f7e8a1814 100644 --- a/examples/community/pipeline_stable_diffusion_boxdiff.py +++ b/examples/community/pipeline_stable_diffusion_boxdiff.py @@ -72,7 +72,7 @@ class GaussianSmoothing(nn.Module): """ Copied from official repo: https://github.com/showlab/BoxDiff/blob/master/utils/gaussian_smoothing.py Apply gaussian smoothing on a - 1d, 2d or 3d tensor. Filtering is performed seperately for each channel + 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. Arguments: channels (int, sequence): Number of channels of the input tensors. Output will diff --git a/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py b/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py index 8459553f4e47..73f52736f4a1 100644 --- a/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py +++ b/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py @@ -1509,7 +1509,7 @@ def invert( add_time_ids = add_time_ids.repeat(batch_size, 1).to(DEVICE) - # interative sampling + # interactive sampling self.scheduler.set_timesteps(num_inference_steps) latents_list = [latents] pred_x0_list = [] @@ -1548,7 +1548,7 @@ def opt( x: torch.FloatTensor, ): """ - predict the sampe the next step in the denoise process. + predict the sample the next step in the denoise process. """ ref_noise = model_output[:1, :, :, :].expand(model_output.shape) alpha_prod_t = self.scheduler.alphas_cumprod[timestep] diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py index e55be92962f2..de5887c6de22 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py @@ -132,7 +132,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py index 791e05ebaf9b..c5f8ec3dfa07 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py @@ -150,7 +150,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/examples/community/regional_prompting_stable_diffusion.py b/examples/community/regional_prompting_stable_diffusion.py index 9f09b4bd2bba..25923a65031a 100644 --- a/examples/community/regional_prompting_stable_diffusion.py +++ b/examples/community/regional_prompting_stable_diffusion.py @@ -220,7 +220,7 @@ def getcompelembs(prps): revers = True def pcallback(s_self, step: int, timestep: int, latents: torch.Tensor, selfs=None): - if "PRO" in mode: # in Prompt mode, make masks from sum of attension maps + if "PRO" in mode: # in Prompt mode, make masks from sum of attention maps self.step = step if len(self.attnmaps_sizes) > 3: @@ -552,9 +552,9 @@ def get_attn_maps(self, attn): def reset_attnmaps(self): # init parameters in every batch self.step = 0 - self.attnmaps = {} # maked from attention maps + self.attnmaps = {} # made from attention maps self.attnmaps_sizes = [] # height,width set of u-net blocks - self.attnmasks = {} # maked from attnmaps for regions + self.attnmasks = {} # made from attnmaps for regions self.maskready = False self.history = {} diff --git a/examples/community/sde_drag.py b/examples/community/sde_drag.py index 902eaa99f417..3ded8c247c3a 100644 --- a/examples/community/sde_drag.py +++ b/examples/community/sde_drag.py @@ -97,7 +97,7 @@ def __call__( steps (`int`, *optional*, defaults to 200): The number of sampling iterations. step_size (`int`, *optional*, defaults to 2): - The drag diatance of each drag step. + The drag distance of each drag step. image_scale (`float`, *optional*, defaults to 0.3): To avoid duplicating the content, use image_scale to perturbs the source. adapt_radius (`int`, *optional*, defaults to 5): diff --git a/examples/community/unclip_image_interpolation.py b/examples/community/unclip_image_interpolation.py index 210bd61ecd1d..413c103cefc1 100644 --- a/examples/community/unclip_image_interpolation.py +++ b/examples/community/unclip_image_interpolation.py @@ -284,7 +284,7 @@ def __call__( ) else: raise AssertionError( - f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} repsectively" + f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} respectively" ) original_image_embeddings = self._encode_image( diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py b/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py index 3414640f55cf..b254799756e9 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py @@ -1012,7 +1012,7 @@ def main(args): unet = get_peft_model(unet, lora_config) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py b/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py index cb8c425bcbec..a332b30b2805 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py @@ -829,7 +829,7 @@ def main(args): ) # 8. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py b/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py index d636c145ffd9..52d480610070 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py @@ -1026,7 +1026,7 @@ def main(args): unet = get_peft_model(unet, lora_config) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_sd_wds.py b/examples/consistency_distillation/train_lcm_distill_sd_wds.py index 50a3d4ebd190..3be506352fd6 100644 --- a/examples/consistency_distillation/train_lcm_distill_sd_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_sd_wds.py @@ -962,7 +962,7 @@ def main(args): ) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py b/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py index a719db9a895d..5a28201bf7b3 100644 --- a/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py @@ -1021,7 +1021,7 @@ def main(args): ) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/controlnet/README_flux.md b/examples/controlnet/README_flux.md index aa5fa251409e..fcac6df1106f 100644 --- a/examples/controlnet/README_flux.md +++ b/examples/controlnet/README_flux.md @@ -411,7 +411,7 @@ export CAPTION_COLUMN='caption_column' export CACHE_DIR="/data/train_csr/.cache/huggingface/" export OUTPUT_DIR='/data/train_csr/FLUX/MODEL_OUT/'$MODEL_TYPE -# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using acclerate would cause problems.) +# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using accelerate would cause problems.) CUDA_VISIBLE_DEVICES=0 python3 train_controlnet_flux.py \ diff --git a/examples/dreambooth/README_flux.md b/examples/dreambooth/README_flux.md index c0802246e1f2..3a6f7905e614 100644 --- a/examples/dreambooth/README_flux.md +++ b/examples/dreambooth/README_flux.md @@ -173,13 +173,13 @@ accelerate launch train_dreambooth_lora_flux.py \ ### Target Modules When LoRA was first adapted from language models to diffusion models, it was applied to the cross-attention layers in the Unet that relate the image representations with the prompts that describe them. More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore -applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma seperated string +applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string the exact modules for LoRA training. Here are some examples of target modules you can provide: - for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"` - to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"` - to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"` > [!NOTE] -> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma seperated string: +> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string: > **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k` > **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` > [!NOTE] diff --git a/examples/dreambooth/README_hidream.md b/examples/dreambooth/README_hidream.md index a0e8c1feca19..63b19a7f70cc 100644 --- a/examples/dreambooth/README_hidream.md +++ b/examples/dreambooth/README_hidream.md @@ -107,7 +107,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--rank`: The rank of the LoRA layers. The higher the rank, the more parameters are trained. The default is 16. We provide several options for optimizing memory optimization: diff --git a/examples/dreambooth/README_lumina2.md b/examples/dreambooth/README_lumina2.md index e466ec5a68e7..fe2907092c1d 100644 --- a/examples/dreambooth/README_lumina2.md +++ b/examples/dreambooth/README_lumina2.md @@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--system_prompt`: A custom system prompt to provide additional personality to the model. * `--max_sequence_length`: Maximum sequence length to use for text embeddings. diff --git a/examples/dreambooth/README_sana.md b/examples/dreambooth/README_sana.md index d82529c64de8..6136bfcc16d7 100644 --- a/examples/dreambooth/README_sana.md +++ b/examples/dreambooth/README_sana.md @@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--complex_human_instruction`: Instructions for complex human attention as shown in [here](https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55). * `--max_sequence_length`: Maximum sequence length to use for text embeddings. diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 193c5affe600..5341c321c312 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -567,7 +567,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_hidream.py b/examples/dreambooth/train_dreambooth_lora_hidream.py index fbf62999d637..39de32091408 100644 --- a/examples/dreambooth/train_dreambooth_lora_hidream.py +++ b/examples/dreambooth/train_dreambooth_lora_hidream.py @@ -596,7 +596,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_lumina2.py b/examples/dreambooth/train_dreambooth_lora_lumina2.py index e933a8033018..1e4db90d874e 100644 --- a/examples/dreambooth/train_dreambooth_lora_lumina2.py +++ b/examples/dreambooth/train_dreambooth_lora_lumina2.py @@ -514,7 +514,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sana.py b/examples/dreambooth/train_dreambooth_lora_sana.py index 94effd7cba73..bef6e045949d 100644 --- a/examples/dreambooth/train_dreambooth_lora_sana.py +++ b/examples/dreambooth/train_dreambooth_lora_sana.py @@ -513,7 +513,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index c693038bb54f..b1786260d1f2 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -576,7 +576,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string." + "The transformer block layers to apply LoRA training on. Please specify the layers in a comma separated string." "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md" ), ) @@ -585,7 +585,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner." + "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma separated manner." 'E.g. - "--lora_blocks 12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index fd5019617033..90979ee8ff1c 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -664,7 +664,7 @@ def parse_args(input_args=None): action="store_true", default=False, help=( - "Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " + "Whether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " "Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`" ), ) diff --git a/examples/flux-control/train_control_lora_flux.py b/examples/flux-control/train_control_lora_flux.py index db27f06f8796..fe078f3e7580 100644 --- a/examples/flux-control/train_control_lora_flux.py +++ b/examples/flux-control/train_control_lora_flux.py @@ -329,7 +329,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' ), ) parser.add_argument( diff --git a/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py b/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py index a60bd7d586ad..f96a4c4f985e 100644 --- a/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py +++ b/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py @@ -400,7 +400,7 @@ def main(): image_encoder.requires_grad_(False) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/consistency_training/train_cm_ct_unconditional.py b/examples/research_projects/consistency_training/train_cm_ct_unconditional.py index 2bea064cdb72..c873356eb214 100644 --- a/examples/research_projects/consistency_training/train_cm_ct_unconditional.py +++ b/examples/research_projects/consistency_training/train_cm_ct_unconditional.py @@ -1147,7 +1147,7 @@ def recalculate_num_discretization_step_values(discretization_steps, skip_steps) tracker_config = dict(vars(args)) accelerator.init_trackers(args.tracker_project_name, config=tracker_config) - # Function for unwraping if torch.compile() was used in accelerate. + # Function for unwrapping if torch.compile() was used in accelerate. def unwrap_model(model): model = accelerator.unwrap_model(model) model = model._orig_mod if is_compiled_module(model) else model diff --git a/examples/research_projects/flux_lora_quantization/README.md b/examples/research_projects/flux_lora_quantization/README.md index 51005b640221..840d02fce71c 100644 --- a/examples/research_projects/flux_lora_quantization/README.md +++ b/examples/research_projects/flux_lora_quantization/README.md @@ -69,7 +69,7 @@ accelerate launch --config_file=accelerate.yaml \ --seed="0" ``` -We can direcly pass a quantized checkpoint path, too: +We can directly pass a quantized checkpoint path, too: ```diff + --quantized_model_path="hf-internal-testing/flux.1-dev-nf4-pkg" diff --git a/examples/research_projects/intel_opts/inference_bf16.py b/examples/research_projects/intel_opts/inference_bf16.py index 96ec709f433c..13f2731fb713 100644 --- a/examples/research_projects/intel_opts/inference_bf16.py +++ b/examples/research_projects/intel_opts/inference_bf16.py @@ -13,7 +13,7 @@ device = "cpu" -prompt = "a lovely in red dress and hat, in the snowly and brightly night, with many brighly buildings" +prompt = "a lovely in red dress and hat, in the snowly and brightly night, with many brightly buildings" model_id = "path-to-your-trained-model" pipe = StableDiffusionPipeline.from_pretrained(model_id) diff --git a/examples/research_projects/intel_opts/textual_inversion_dfq/README.md b/examples/research_projects/intel_opts/textual_inversion_dfq/README.md index 4a227cdb4d63..184a64ec7678 100644 --- a/examples/research_projects/intel_opts/textual_inversion_dfq/README.md +++ b/examples/research_projects/intel_opts/textual_inversion_dfq/README.md @@ -80,7 +80,7 @@ export INT8_MODEL_NAME="./int8_model" python text2images.py \ --pretrained_model_name_or_path=$INT8_MODEL_NAME \ - --caption "a lovely in red dress and hat, in the snowly and brightly night, with many brighly buildings." \ + --caption "a lovely in red dress and hat, in the snowly and brightly night, with many brightly buildings." \ --images_num 4 ``` diff --git a/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py b/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py index 4065a854c22d..5a555c45a130 100644 --- a/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py +++ b/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py @@ -664,7 +664,7 @@ def _clean_caption(self, caption): # & caption = re.sub(r"&", "", caption) - # ip adresses: + # ip addresses: caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption) # article ids: diff --git a/examples/research_projects/pixart/train_pixart_controlnet_hf.py b/examples/research_projects/pixart/train_pixart_controlnet_hf.py index 67ec30da0ece..98329c6cd43d 100644 --- a/examples/research_projects/pixart/train_pixart_controlnet_hf.py +++ b/examples/research_projects/pixart/train_pixart_controlnet_hf.py @@ -612,7 +612,7 @@ def main(): # See Section 3.1. of the paper. max_length = 120 - # For mixed precision training we cast all non-trainable weigths (vae, text_encoder) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, text_encoder) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py b/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py index 9c98c9b5ff4f..35cb015a6cc7 100644 --- a/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py +++ b/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py @@ -120,11 +120,11 @@ def _main(index, args, text_pipe, ckpt_id): parser.add_argument("--schnell", action="store_true", help="run flux schnell instead of dev") parser.add_argument("--width", type=int, default=1024, help="width of the image to generate") parser.add_argument("--height", type=int, default=1024, help="height of the image to generate") - parser.add_argument("--guidance", type=float, default=3.5, help="gauidance strentgh for dev") + parser.add_argument("--guidance", type=float, default=3.5, help="guidance strength for dev") parser.add_argument("--seed", type=int, default=None, help="seed for inference") parser.add_argument("--profile", action="store_true", help="enable profiling") parser.add_argument("--profile-duration", type=int, default=10000, help="duration for profiling in msec.") - parser.add_argument("--itters", type=int, default=15, help="tiems to run inference and get avg time in sec.") + parser.add_argument("--itters", type=int, default=15, help="items to run inference and get avg time in sec.") args = parser.parse_args() if args.schnell: ckpt_id = "black-forest-labs/FLUX.1-schnell" diff --git a/examples/research_projects/realfill/train_realfill.py b/examples/research_projects/realfill/train_realfill.py index c7cc25df02b9..419636d1311e 100644 --- a/examples/research_projects/realfill/train_realfill.py +++ b/examples/research_projects/realfill/train_realfill.py @@ -759,7 +759,7 @@ def load_model_hook(models, input_dir): unet, text_encoder, optimizer, train_dataloader ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py index 01ef67a55da4..402265bde1c7 100644 --- a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py @@ -661,7 +661,7 @@ def parse_args(input_args=None): action="store_true", default=False, help=( - "Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " + "Whether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " "Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`" ), ) diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py index 019b79601159..6dcc2ff7dce9 100644 --- a/examples/textual_inversion/textual_inversion.py +++ b/examples/textual_inversion/textual_inversion.py @@ -789,7 +789,7 @@ def main(): text_encoder, optimizer, train_dataloader, lr_scheduler ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/textual_inversion/textual_inversion_sdxl.py b/examples/textual_inversion/textual_inversion_sdxl.py index d142cccc926e..ecbc7a185bcd 100644 --- a/examples/textual_inversion/textual_inversion_sdxl.py +++ b/examples/textual_inversion/textual_inversion_sdxl.py @@ -814,7 +814,7 @@ def main(): text_encoder_1, text_encoder_2, optimizer, train_dataloader, lr_scheduler ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/scripts/convert_flux_to_diffusers.py b/scripts/convert_flux_to_diffusers.py index fccac70dd855..ec31d842d4db 100644 --- a/scripts/convert_flux_to_diffusers.py +++ b/scripts/convert_flux_to_diffusers.py @@ -220,7 +220,7 @@ def convert_flux_transformer_checkpoint_to_diffusers( f"double_blocks.{i}.txt_attn.proj.bias" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." # norm.linear <- single_blocks.0.modulation.lin diff --git a/scripts/convert_sana_to_diffusers.py b/scripts/convert_sana_to_diffusers.py index 1c40072177c6..959a647e0a5e 100644 --- a/scripts/convert_sana_to_diffusers.py +++ b/scripts/convert_sana_to_diffusers.py @@ -394,7 +394,7 @@ def main(args): help="Scheduler type to use. Use 'scm' for Sana Sprint models.", ) parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output pipeline.") - parser.add_argument("--save_full_pipeline", action="store_true", help="save all the pipelien elemets in one.") + parser.add_argument("--save_full_pipeline", action="store_true", help="save all the pipeline elements in one.") parser.add_argument("--dtype", default="fp32", type=str, choices=["fp32", "fp16", "bf16"], help="Weight dtype.") args = parser.parse_args() diff --git a/scripts/convert_shap_e_to_diffusers.py b/scripts/convert_shap_e_to_diffusers.py index b903b4ee8a7f..ac6543667af9 100644 --- a/scripts/convert_shap_e_to_diffusers.py +++ b/scripts/convert_shap_e_to_diffusers.py @@ -984,7 +984,7 @@ def renderer(*, args, checkpoint_map_location): return renderer_model -# prior model will expect clip_mean and clip_std, whic are missing from the state_dict +# prior model will expect clip_mean and clip_std, which are missing from the state_dict PRIOR_EXPECTED_MISSING_KEYS = ["clip_mean", "clip_std"] diff --git a/scripts/convert_wuerstchen.py b/scripts/convert_wuerstchen.py index 23d45d3dd6ad..826b9b208181 100644 --- a/scripts/convert_wuerstchen.py +++ b/scripts/convert_wuerstchen.py @@ -55,8 +55,8 @@ state_dict[key.replace("attn.out_proj.bias", "to_out.0.bias")] = weights else: state_dict[key] = orig_state_dict[key] -deocder = WuerstchenDiffNeXt() -deocder.load_state_dict(state_dict) +decoder = WuerstchenDiffNeXt() +decoder.load_state_dict(state_dict) # Prior orig_state_dict = torch.load(os.path.join(model_path, "model_v3_stage_c.pt"), map_location=device)["ema_state_dict"] @@ -94,7 +94,7 @@ prior_pipeline.save_pretrained("warp-ai/wuerstchen-prior") decoder_pipeline = WuerstchenDecoderPipeline( - text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, vqgan=vqmodel, decoder=deocder, scheduler=scheduler + text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, vqgan=vqmodel, decoder=decoder, scheduler=scheduler ) decoder_pipeline.save_pretrained("warp-ai/wuerstchen") @@ -103,7 +103,7 @@ # Decoder text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, - decoder=deocder, + decoder=decoder, scheduler=scheduler, vqgan=vqmodel, # Prior diff --git a/src/diffusers/hooks/group_offloading.py b/src/diffusers/hooks/group_offloading.py index d88114436dc1..1c558bc49546 100644 --- a/src/diffusers/hooks/group_offloading.py +++ b/src/diffusers/hooks/group_offloading.py @@ -232,7 +232,7 @@ def post_forward(self, module: torch.nn.Module, output): class LazyPrefetchGroupOffloadingHook(ModelHook): r""" - A hook, used in conjuction with GroupOffloadingHook, that applies lazy prefetching to groups of torch.nn.Module. + A hook, used in conjunction with GroupOffloadingHook, that applies lazy prefetching to groups of torch.nn.Module. This hook is used to determine the order in which the layers are executed during the forward pass. Once the layer invocation order is known, assignments of the next_group attribute for prefetching can be made, which allows prefetching groups in the correct order. diff --git a/src/diffusers/hooks/layerwise_casting.py b/src/diffusers/hooks/layerwise_casting.py index 6f2cfdc3485a..c0105ab93483 100644 --- a/src/diffusers/hooks/layerwise_casting.py +++ b/src/diffusers/hooks/layerwise_casting.py @@ -90,7 +90,7 @@ class PeftInputAutocastDisableHook(ModelHook): that the inputs are casted to the computation dtype correctly always. However, there are two goals we are hoping to achieve: 1. Making forward implementations independent of device/dtype casting operations as much as possible. - 2. Peforming inference without losing information from casting to different precisions. With the current + 2. Performing inference without losing information from casting to different precisions. With the current PEFT implementation (as linked in the reference above), and assuming running layerwise casting inference with storage_dtype=torch.float8_e4m3fn and compute_dtype=torch.bfloat16, inputs are cast to torch.float8_e4m3fn in the lora layer. We will then upcast back to torch.bfloat16 when we continue the diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index d0c9611735ce..a9e154af3cec 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -819,7 +819,7 @@ def _convert(original_key, diffusers_key, state_dict, new_state_dict): if zero_status_pe: logger.info( "The `position_embedding` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: @@ -835,7 +835,7 @@ def _convert(original_key, diffusers_key, state_dict, new_state_dict): if zero_status_t5: logger.info( "The `t5xxl` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -850,7 +850,7 @@ def _convert(original_key, diffusers_key, state_dict, new_state_dict): if zero_status_diff_b: logger.info( "The `diff_b` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -866,7 +866,7 @@ def _convert(original_key, diffusers_key, state_dict, new_state_dict): if zero_status_diff: logger.info( "The `diff` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -1237,7 +1237,7 @@ def _convert_bfl_flux_control_lora_to_diffusers(original_state_dict): f"double_blocks.{i}.txt_attn.norm.key_norm.scale" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 50a99cee1d23..1a6768e70de4 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -2413,7 +2413,7 @@ def _maybe_expand_transformer_param_shape_or_error_( ) -> bool: """ Control LoRA expands the shape of the input layer from (3072, 64) to (3072, 128). This method handles that and - generalizes things a bit so that any parameter that needs expansion receives appropriate treatement. + generalizes things a bit so that any parameter that needs expansion receives appropriate treatment. """ state_dict = {} if lora_state_dict is not None: diff --git a/src/diffusers/loaders/peft.py b/src/diffusers/loaders/peft.py index 50450ab7d880..bbef5b1628cb 100644 --- a/src/diffusers/loaders/peft.py +++ b/src/diffusers/loaders/peft.py @@ -330,7 +330,7 @@ def map_state_dict_for_hotswap(sd): new_sd[k] = v return new_sd - # To handle scenarios where we cannot successfully set state dict. If it's unsucessful, + # To handle scenarios where we cannot successfully set state dict. If it's unsuccessful, # we should also delete the `peft_config` associated to the `adapter_name`. try: if hotswap: @@ -344,7 +344,7 @@ def map_state_dict_for_hotswap(sd): config=lora_config, ) except Exception as e: - logger.error(f"Hotswapping {adapter_name} was unsucessful with the following error: \n{e}") + logger.error(f"Hotswapping {adapter_name} was unsuccessful with the following error: \n{e}") raise # the hotswap function raises if there are incompatible keys, so if we reach this point we can set # it to None @@ -379,7 +379,7 @@ def map_state_dict_for_hotswap(sd): module.delete_adapter(adapter_name) self.peft_config.pop(adapter_name) - logger.error(f"Loading {adapter_name} was unsucessful with the following error: \n{e}") + logger.error(f"Loading {adapter_name} was unsuccessful with the following error: \n{e}") raise warn_msg = "" @@ -712,7 +712,7 @@ def _fuse_lora_apply(self, module, adapter_names=None): if self.lora_scale != 1.0: module.scale_layer(self.lora_scale) - # For BC with prevous PEFT versions, we need to check the signature + # For BC with previous PEFT versions, we need to check the signature # of the `merge` method to see if it supports the `adapter_names` argument. supported_merge_kwargs = list(inspect.signature(module.merge).parameters) if "adapter_names" in supported_merge_kwargs: diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index c2843fc7406a..c15f8287356c 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -453,7 +453,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs) -> Self: logger.warning( "Detected legacy `from_single_file` loading behavior. Attempting to create the pipeline based on inferred components.\n" "This may lead to errors if the model components are not correctly inferred. \n" - "To avoid this warning, please explicity pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n" + "To avoid this warning, please explicitly pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n" "e.g. `from_single_file(, config=) \n" "or run `from_single_file` with `local_files_only=False` first to update the local cache directory with " "the necessary config files.\n" diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index b55b1b55206e..3a2855df2d7d 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -2278,7 +2278,7 @@ def swap_scale_shift(weight): f"double_blocks.{i}.txt_attn.proj.bias" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." # norm.linear <- single_blocks.0.modulation.lin @@ -2872,7 +2872,7 @@ def calculate_layers(keys, key_prefix): def convert_lumina2_to_diffusers(checkpoint, **kwargs): converted_state_dict = {} - # Original Lumina-Image-2 has an extra norm paramter that is unused + # Original Lumina-Image-2 has an extra norm parameter that is unused # We just remove it here checkpoint.pop("norm_final.weight", None) diff --git a/src/diffusers/loaders/transformer_sd3.py b/src/diffusers/loaders/transformer_sd3.py index ece17e6728fa..4715372f3d2e 100644 --- a/src/diffusers/loaders/transformer_sd3.py +++ b/src/diffusers/loaders/transformer_sd3.py @@ -123,7 +123,7 @@ def _convert_ip_adapter_image_proj_to_diffusers( key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj") updated_state_dict[key] = value - # Image projetion parameters + # Image projection parameters embed_dim = updated_state_dict["proj_in.weight"].shape[1] output_dim = updated_state_dict["proj_out.weight"].shape[0] hidden_dim = updated_state_dict["proj_in.weight"].shape[0] diff --git a/src/diffusers/models/controlnets/controlnet_xs.py b/src/diffusers/models/controlnets/controlnet_xs.py index 608be6b70277..9248f934bcda 100644 --- a/src/diffusers/models/controlnets/controlnet_xs.py +++ b/src/diffusers/models/controlnets/controlnet_xs.py @@ -734,17 +734,17 @@ def from_unet( unet (`UNet2DConditionModel`): The UNet model we want to control. controlnet (`ControlNetXSAdapter`): - The ConntrolNet-XS adapter with which the UNet will be fused. If none is given, a new ConntrolNet-XS + The ControlNet-XS adapter with which the UNet will be fused. If none is given, a new ControlNet-XS adapter will be created. size_ratio (float, *optional*, defaults to `None`): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. ctrl_block_out_channels (`List[int]`, *optional*, defaults to `None`): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details, + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details, where this parameter is called `block_out_channels`. time_embedding_mix (`float`, *optional*, defaults to None): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`): - Passed to the `init` of the new controlent if no controlent was given. + Passed to the `init` of the new controlnet if no controlnet was given. """ if controlnet is None: controlnet = ControlNetXSAdapter.from_unet( diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py index b1e14ca6a7fe..0e1144f60100 100644 --- a/src/diffusers/models/embeddings.py +++ b/src/diffusers/models/embeddings.py @@ -97,7 +97,7 @@ def get_3d_sincos_pos_embed( The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both spatial dimensions (height and width). temporal_size (`int`): - The temporal dimension of postional embeddings (number of frames). + The temporal dimension of positional embeddings (number of frames). spatial_interpolation_scale (`float`, defaults to 1.0): Scale factor for spatial grid interpolation. temporal_interpolation_scale (`float`, defaults to 1.0): @@ -169,7 +169,7 @@ def _get_3d_sincos_pos_embed_np( The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both spatial dimensions (height and width). temporal_size (`int`): - The temporal dimension of postional embeddings (number of frames). + The temporal dimension of positional embeddings (number of frames). spatial_interpolation_scale (`float`, defaults to 1.0): Scale factor for spatial grid interpolation. temporal_interpolation_scale (`float`, defaults to 1.0): diff --git a/src/diffusers/models/transformers/latte_transformer_3d.py b/src/diffusers/models/transformers/latte_transformer_3d.py index 27fb3f51a260..4f413ea6a5b3 100644 --- a/src/diffusers/models/transformers/latte_transformer_3d.py +++ b/src/diffusers/models/transformers/latte_transformer_3d.py @@ -30,7 +30,7 @@ class LatteTransformer3DModel(ModelMixin, ConfigMixin, CacheMixin): _supports_gradient_checkpointing = True """ - A 3D Transformer model for video-like data, paper: https://arxiv.org/abs/2401.03048, offical code: + A 3D Transformer model for video-like data, paper: https://arxiv.org/abs/2401.03048, official code: https://github.com/Vchitect/Latte Parameters: @@ -216,7 +216,7 @@ def forward( ) num_patches = height * width - hidden_states = self.pos_embed(hidden_states) # alrady add positional embeddings + hidden_states = self.pos_embed(hidden_states) # already add positional embeddings added_cond_kwargs = {"resolution": None, "aspect_ratio": None} timestep, embedded_timestep = self.adaln_single( diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index 320950866c4a..6cf19cb3c399 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -43,7 +43,7 @@ class LuminaNextDiTBlock(nn.Module): num_kv_heads (`int`): Number of attention heads in key and value features (if using GQA), or set to None for the same as query. multiple_of (`int`): The number of multiple of ffn layer. - ffn_dim_multiplier (`float`): The multipier factor of ffn layer dimension. + ffn_dim_multiplier (`float`): The multiplier factor of ffn layer dimension. norm_eps (`float`): The eps for norm layer. qk_norm (`bool`): normalization for query and key. cross_attention_dim (`int`): Cross attention embedding dimension of the input text prompt hidden_states. diff --git a/src/diffusers/models/unets/unet_i2vgen_xl.py b/src/diffusers/models/unets/unet_i2vgen_xl.py index c275e16744f4..58fa30e4979f 100644 --- a/src/diffusers/models/unets/unet_i2vgen_xl.py +++ b/src/diffusers/models/unets/unet_i2vgen_xl.py @@ -154,7 +154,7 @@ def __init__( # of that, we used `num_attention_heads` for arguments that actually denote attention head dimension. This # is why we ignore `num_attention_heads` and calculate it from `attention_head_dims` below. # This is still an incorrect way of calculating `num_attention_heads` but we need to stick to it - # without running proper depcrecation cycles for the {down,mid,up} blocks which are a + # without running proper deprecation cycles for the {down,mid,up} blocks which are a # part of the public API. num_attention_heads = attention_head_dim diff --git a/src/diffusers/pipelines/amused/pipeline_amused.py b/src/diffusers/pipelines/amused/pipeline_amused.py index 12f7dc7c59d4..f0948ede9b2a 100644 --- a/src/diffusers/pipelines/amused/pipeline_amused.py +++ b/src/diffusers/pipelines/amused/pipeline_amused.py @@ -131,7 +131,7 @@ def __call__( generation deterministic. latents (`torch.IntTensor`, *optional*): Pre-generated tokens representing latent vectors in `self.vqvae`, to be used as inputs for image - gneration. If not provided, the starting latents will be completely masked. + generation. If not provided, the starting latents will be completely masked. prompt_embeds (`torch.Tensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, text embeddings are generated from the `prompt` input argument. A single vector from the diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index f80771381b50..87d78646a966 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -373,7 +373,7 @@ def encode_prompt( *e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from `negative_prompt` input argument. generated_prompt_embeds (`torch.Tensor`, *optional*): - Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs, + Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. negative_generated_prompt_embeds (`torch.Tensor`, *optional*): @@ -394,7 +394,7 @@ def encode_prompt( attention_mask (`torch.LongTensor`): Attention mask to be applied to the `prompt_embeds`. generated_prompt_embeds (`torch.Tensor`): - Text embeddings generated from the GPT2 langauge model. + Text embeddings generated from the GPT2 language model. Example: @@ -904,7 +904,7 @@ def __call__( Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. generated_prompt_embeds (`torch.Tensor`, *optional*): - Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs, + Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. negative_generated_prompt_embeds (`torch.Tensor`, *optional*): diff --git a/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py b/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py index cbd8bef67945..ee9615e828a7 100644 --- a/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +++ b/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py @@ -138,7 +138,7 @@ def __init__( def get_query_embeddings(self, input_image, src_subject): return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False) - # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it + # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20): rv = [] for prompt, tgt_subject in zip(prompts, tgt_subjects): diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py index 88c387d48dd2..c73dd9824f4b 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py @@ -149,7 +149,7 @@ def __init__( def get_query_embeddings(self, input_image, src_subject): return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False) - # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it + # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20): rv = [] for prompt, tgt_subject in zip(prompts, tgt_subjects): diff --git a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py index 901ca25c576c..8792961e31f5 100644 --- a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +++ b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py @@ -739,7 +739,7 @@ def __call__( callback_on_step_end_tensor_inputs (`List`, *optional*): The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the - `._callback_tensor_inputs` attribute of your pipeine class. + `._callback_tensor_inputs` attribute of your pipeline class. Examples: Returns: diff --git a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py index acf1f5489ec1..1d36038d3a45 100644 --- a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +++ b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py @@ -880,7 +880,7 @@ def __call__( callback_on_step_end_tensor_inputs (`List`, *optional*): The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the - `._callback_tensor_inputs` attribute of your pipeine class. + `._callback_tensor_inputs` attribute of your pipeline class. Examples: diff --git a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py index 34b2a3945572..b33c3735c2fb 100644 --- a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +++ b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py @@ -97,7 +97,7 @@ def __call__( for i, audio in enumerate(audios): write(f"maestro_test_{i}.wav", pipe.unet.sample_rate, audio.transpose()) - # To dislay in google colab + # To display in google colab import IPython.display as ipd for audio in audios: diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index 7225f2f234be..a6b876eb181d 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -509,7 +509,7 @@ def edit_model( The destination prompt. Must contain all words from `source_prompt` with additional ones to specify the target edit. lamb (`float`, *optional*, defaults to 0.1): - The lambda parameter specifying the regularization intesity. Smaller values increase the editing power. + The lambda parameter specifying the regularization intensity. Smaller values increase the editing power. restart_params (`bool`, *optional*, defaults to True): Restart the model parameters to their pre-trained version before editing. This is done to avoid edit compounding. When it is `False`, edits accumulate. diff --git a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py index bc276811ff4a..7dd8182dfecd 100644 --- a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py @@ -1097,7 +1097,7 @@ def forward( cross_attention_kwargs (`dict`, *optional*): A kwargs dictionary that if specified is passed along to the [`AttnProcessor`]. added_cond_kwargs: (`dict`, *optional*): - A kwargs dictionary containin additional embeddings that if specified are added to the embeddings that + A kwargs dictionary containing additional embeddings that if specified are added to the embeddings that are passed along to the UNet blocks. down_block_additional_residuals (`tuple` of `torch.Tensor`, *optional*): additional residuals to be added to UNet long skip connections from down blocks to up blocks for diff --git a/src/diffusers/pipelines/free_noise_utils.py b/src/diffusers/pipelines/free_noise_utils.py index 8ea5eb7dd575..4a65008183b6 100644 --- a/src/diffusers/pipelines/free_noise_utils.py +++ b/src/diffusers/pipelines/free_noise_utils.py @@ -478,7 +478,7 @@ def enable_free_noise( Must be one of ["shuffle_context", "repeat_context", "random"]. - "shuffle_context" Shuffles a fixed batch of `context_length` latents to create a final latent of size - `num_frames`. This is usually the best setting for most generation scenarious. However, there + `num_frames`. This is usually the best setting for most generation scenarios. However, there might be visible repetition noticeable in the kinds of motion/animation generated. - "repeated_context" Repeats a fixed batch of `context_length` latents to create a final latent of size diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 58d65a190d5b..a00b16d000a1 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -462,7 +462,7 @@ def prepare_image_latents( image_latents = image_latents.unsqueeze(2) # Append a position mask for each subsequent frame - # after the intial image latent frame + # after the initial image latent frame frame_position_mask = [] for frame_idx in range(num_frames - 1): scale = (frame_idx + 1) / (num_frames - 1) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py index 769c834ec3cc..a838f5618f7c 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py @@ -496,7 +496,7 @@ def __call__( "As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. " "This way, Kandinsky's masking behavior is aligned with Stable Diffusion. " "THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. " - "This warning will be surpressed after the first inference call and will be removed in diffusers>0.23.0" + "This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0" ) self._warn_has_been_called = True diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py index 482093a4bb29..e99aa918ff25 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py @@ -386,7 +386,7 @@ def __call__( "As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. " "This way, Kandinsky's masking behavior is aligned with Stable Diffusion. " "THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. " - "This warning will be surpressed after the first inference call and will be removed in diffusers>0.23.0" + "This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0" ) self._warn_has_been_called = True diff --git a/src/diffusers/pipelines/kolors/text_encoder.py b/src/diffusers/pipelines/kolors/text_encoder.py index 757569c880c0..7fd1a2ec0eba 100644 --- a/src/diffusers/pipelines/kolors/text_encoder.py +++ b/src/diffusers/pipelines/kolors/text_encoder.py @@ -668,7 +668,7 @@ def forward(self, input_ids): # Embeddings. words_embeddings = self.word_embeddings(input_ids) embeddings = words_embeddings - # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + # Data format change to avoid explicit transposes : [b s h] --> [s b h]. embeddings = embeddings.transpose(0, 1).contiguous() # If the input flag for fp32 residual connection is set, convert for float. if self.fp32_residual_connection: diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py index bdac47c47ade..37fe35278c5d 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py @@ -1458,7 +1458,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred - # modifed so that updated xtm1 is returned as well (to avoid error accumulation) + # modified so that updated xtm1 is returned as well (to avoid error accumulation) mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction if variance > 0.0: noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta) diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py index cad7d8a66a08..a062b5ae6d91 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py @@ -1742,7 +1742,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred - # modifed so that updated xtm1 is returned as well (to avoid error accumulation) + # modified so that updated xtm1 is returned as well (to avoid error accumulation) mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction if variance > 0.0: noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta) diff --git a/src/diffusers/pipelines/marigold/marigold_image_processing.py b/src/diffusers/pipelines/marigold/marigold_image_processing.py index 0723014ad37b..5130a876606a 100644 --- a/src/diffusers/pipelines/marigold/marigold_image_processing.py +++ b/src/diffusers/pipelines/marigold/marigold_image_processing.py @@ -426,7 +426,7 @@ def visualize_depth_one(img, idx=None): if isinstance(img, np.ndarray): img = torch.from_numpy(img) if not torch.is_floating_point(img): - raise ValueError(f"{prefix}: unexected dtype={img.dtype}.") + raise ValueError(f"{prefix}: unexpected dtype={img.dtype}.") else: raise ValueError(f"{prefix}: unexpected type={type(img)}.") if val_min != 0.0 or val_max != 1.0: @@ -464,7 +464,7 @@ def export_depth_to_16bit_png_one(img, idx=None): if torch.is_tensor(img): img = img.cpu().numpy() if not np.issubdtype(img.dtype, np.floating): - raise ValueError(f"{prefix}: unexected dtype={img.dtype}.") + raise ValueError(f"{prefix}: unexpected dtype={img.dtype}.") if val_min != 0.0 or val_max != 1.0: img = (img - val_min) / (val_max - val_min) img = (img * (2**16 - 1)).astype(np.uint16) diff --git a/src/diffusers/pipelines/omnigen/pipeline_omnigen.py b/src/diffusers/pipelines/omnigen/pipeline_omnigen.py index 5fe5be3b26d2..eb564b841ed8 100644 --- a/src/diffusers/pipelines/omnigen/pipeline_omnigen.py +++ b/src/diffusers/pipelines/omnigen/pipeline_omnigen.py @@ -176,7 +176,7 @@ def encode_input_images( get the continue embedding of input images by VAE Args: - input_pixel_values: normlized pixel of input images + input_pixel_values: normalized pixel of input images device: Returns: torch.Tensor """ diff --git a/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py index b84f5d555914..71245a75e2eb 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py @@ -115,7 +115,7 @@ ... with torch.no_grad(), torch.autocast("cuda"): ... depth_map = depth_estimator(image).predicted_depth - ... depth_map = torch.nn.fuctional.interpolate( + ... depth_map = torch.nn.functional.interpolate( ... depth_map.unsqueeze(1), ... size=(1024, 1024), ... mode="bicubic", diff --git a/src/diffusers/pipelines/shap_e/renderer.py b/src/diffusers/pipelines/shap_e/renderer.py index dd25945590cd..00f873115f93 100644 --- a/src/diffusers/pipelines/shap_e/renderer.py +++ b/src/diffusers/pipelines/shap_e/renderer.py @@ -1038,7 +1038,7 @@ def decode_to_mesh( textures = _convert_srgb_to_linear(textures) textures = textures.float() - # 3.3 augument the mesh with texture data + # 3.3 augment the mesh with texture data assert len(textures.shape) == 3 and textures.shape[-1] == len(texture_channels), ( f"expected [meta_batch x inner_batch x texture_channels] field results, but got {textures.shape}" ) diff --git a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py index 38f1c4314e4f..fce8efdd3cb9 100644 --- a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +++ b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py @@ -524,9 +524,9 @@ def __call__( latents = self.vqgan.config.scale_factor * latents images = self.vqgan.decode(latents).sample.clamp(0, 1) if output_type == "np": - images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work elif output_type == "pil": - images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work images = self.numpy_to_pil(images) else: images = latents diff --git a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py index 241c454e103e..f08e38e7ce67 100644 --- a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +++ b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py @@ -626,11 +626,11 @@ def __call__( self.maybe_free_model_hooks() if output_type == "np": - latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work - prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work + prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work negative_prompt_embeds = ( negative_prompt_embeds.cpu().float().numpy() if negative_prompt_embeds is not None else None - ) # float() as bfloat16-> numpy doesnt work + ) # float() as bfloat16-> numpy doesn't work if not return_dict: return ( diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index 351b146fb423..2c972284a1bd 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -1047,7 +1047,7 @@ def __call__( class GaussianSmoothing(torch.nn.Module): """ Arguments: - Apply gaussian smoothing on a 1d, 2d or 3d tensor. Filtering is performed seperately for each channel in the input + Apply gaussian smoothing on a 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. channels (int, sequence): Number of channels of the input tensors. Output will have this number of channels as well. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 1f29f577f8e0..1ca1fd2ded78 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -123,7 +123,7 @@ def __init__( super().__init__() logger.info( - f"{self.__class__} is an experimntal pipeline and is likely to change in the future. We recommend to use" + f"{self.__class__} is an experimental pipeline and is likely to change in the future. We recommend to use" " this pipeline for fast experimentation / iteration if needed, but advice to rely on existing pipelines" " as defined in https://huggingface.co/docs/diffusers/api/schedulers#implemented-schedulers for" " production settings." diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index 6cd0e415e129..85b157d8ef9b 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -123,7 +123,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 5eacb64d01e3..d5382517caf7 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -121,7 +121,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py index 75e5d43678d5..29f99f3fc7fa 100644 --- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py +++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py @@ -140,7 +140,7 @@ def forward( input_ids (`torch.Tensor` of shape `(N, max_seq_len)`): Text tokens to use for inference. prefix_embeds (`torch.Tensor` of shape `(N, prefix_length, 768)`): - Prefix embedding to preprend to the embedded tokens. + Prefix embedding to prepend to the embedded tokens. attention_mask (`torch.Tensor` of shape `(N, prefix_length + max_seq_len, 768)`, *optional*): Attention mask for the prefix embedding. labels (`torch.Tensor`, *optional*): diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index 66d7404fb9a5..865dba75b720 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -803,7 +803,7 @@ def _split(self, x, height, width): def _combine(self, img_vae, img_clip): r""" - Combines a latent iamge img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1, + Combines a latent image img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1, clip_img_dim) into a single tensor of shape (B, C * H * W + clip_img_dim). """ img_vae = torch.reshape(img_vae, (img_vae.shape[0], -1)) diff --git a/src/diffusers/quantizers/base.py b/src/diffusers/quantizers/base.py index fa9ba98e6d0d..ffa654c98c2b 100644 --- a/src/diffusers/quantizers/base.py +++ b/src/diffusers/quantizers/base.py @@ -199,7 +199,7 @@ def postprocess_model(self, model: "ModelMixin", **kwargs): def dequantize(self, model): """ - Potentially dequantize the model to retrive the original model, with some loss in accuracy / performance. Note + Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance. Note not all quantization schemes support this. """ model = self._dequantize(model) diff --git a/src/diffusers/quantizers/bitsandbytes/utils.py b/src/diffusers/quantizers/bitsandbytes/utils.py index 5476b93a4cc2..9943c1a51149 100644 --- a/src/diffusers/quantizers/bitsandbytes/utils.py +++ b/src/diffusers/quantizers/bitsandbytes/utils.py @@ -49,7 +49,7 @@ def _replace_with_bnb_linear( """ Private method that wraps the recursion for module replacement. - Returns the converted model and a boolean that indicates if the conversion has been successfull or not. + Returns the converted model and a boolean that indicates if the conversion has been successful or not. """ for name, module in model.named_children(): if current_key_name is None: @@ -223,7 +223,7 @@ def _dequantize_and_replace( performance drop compared to the original model before quantization - use it only for specific usecases such as QLoRA adapters merging. - Returns the converted model and a boolean that indicates if the conversion has been successfull or not. + Returns the converted model and a boolean that indicates if the conversion has been successful or not. """ quant_method = quantization_config.quantization_method() diff --git a/src/diffusers/quantizers/gguf/gguf_quantizer.py b/src/diffusers/quantizers/gguf/gguf_quantizer.py index 97f03b07a345..b3e10b1c3246 100644 --- a/src/diffusers/quantizers/gguf/gguf_quantizer.py +++ b/src/diffusers/quantizers/gguf/gguf_quantizer.py @@ -49,7 +49,7 @@ def __init__(self, quantization_config, **kwargs): def validate_environment(self, *args, **kwargs): if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"): raise ImportError( - "Loading GGUF Parameters requires `accelerate` installed in your enviroment: `pip install 'accelerate>=0.26.0'`" + "Loading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`" ) if not is_gguf_available() or is_gguf_version("<", "0.10.0"): raise ImportError( @@ -82,7 +82,7 @@ def check_quantized_param_shape(self, param_name, current_param, loaded_param): inferred_shape = _quant_shape_from_byte_shape(loaded_param_shape, type_size, block_size) if inferred_shape != current_param_shape: raise ValueError( - f"{param_name} has an expected quantized shape of: {inferred_shape}, but receieved shape: {loaded_param_shape}" + f"{param_name} has an expected quantized shape of: {inferred_shape}, but received shape: {loaded_param_shape}" ) return True diff --git a/src/diffusers/quantizers/torchao/torchao_quantizer.py b/src/diffusers/quantizers/torchao/torchao_quantizer.py index f9fb217ed6bd..def7ee33e389 100644 --- a/src/diffusers/quantizers/torchao/torchao_quantizer.py +++ b/src/diffusers/quantizers/torchao/torchao_quantizer.py @@ -262,7 +262,7 @@ def create_quantized_param( **kwargs, ): r""" - Each nn.Linear layer that needs to be quantized is processsed here. First, we set the value the weight tensor, + Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor, then we move it to the target device. Finally, we quantize the module. """ module, tensor_name = get_module_from_name(model, param_name) diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py index daae50627d87..dd28af360704 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -218,7 +218,7 @@ def __init__( if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"] and final_sigmas_type == "zero": raise ValueError( - f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please chooose `sigma_min` instead." + f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please choose `sigma_min` instead." ) # setable values diff --git a/src/diffusers/utils/export_utils.py b/src/diffusers/utils/export_utils.py index 30d2c8bebd8e..07cf46928a44 100644 --- a/src/diffusers/utils/export_utils.py +++ b/src/diffusers/utils/export_utils.py @@ -155,7 +155,7 @@ def export_to_video( bitrate: Set a constant bitrate for the video encoding. Default is None causing `quality` parameter to be used instead. Better quality videos with smaller file sizes will result from using the `quality` variable bitrate parameter - rather than specifiying a fixed bitrate with this parameter. + rather than specifying a fixed bitrate with this parameter. macro_block_size: Size constraint for video. Width and height, must be divisible by this number. If not divisible by this number diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py index d1269fbc5f20..7d0a6faa7afb 100644 --- a/src/diffusers/utils/peft_utils.py +++ b/src/diffusers/utils/peft_utils.py @@ -153,19 +153,19 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict, is_unet=True r = lora_alpha = list(rank_dict.values())[0] if len(set(rank_dict.values())) > 1: - # get the rank occuring the most number of times + # get the rank occurring the most number of times r = collections.Counter(rank_dict.values()).most_common()[0][0] - # for modules with rank different from the most occuring rank, add it to the `rank_pattern` + # for modules with rank different from the most occurring rank, add it to the `rank_pattern` rank_pattern = dict(filter(lambda x: x[1] != r, rank_dict.items())) rank_pattern = {k.split(".lora_B.")[0]: v for k, v in rank_pattern.items()} if network_alpha_dict is not None and len(network_alpha_dict) > 0: if len(set(network_alpha_dict.values())) > 1: - # get the alpha occuring the most number of times + # get the alpha occurring the most number of times lora_alpha = collections.Counter(network_alpha_dict.values()).most_common()[0][0] - # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern` + # for modules with alpha different from the most occurring alpha, add it to the `alpha_pattern` alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items())) if is_unet: alpha_pattern = { diff --git a/src/diffusers/utils/state_dict_utils.py b/src/diffusers/utils/state_dict_utils.py index 3682c5bfacd6..15a91040c48c 100644 --- a/src/diffusers/utils/state_dict_utils.py +++ b/src/diffusers/utils/state_dict_utils.py @@ -219,7 +219,7 @@ def convert_state_dict_to_diffusers(state_dict, original_type=None, **kwargs): kwargs (`dict`, *args*): Additional arguments to pass to the method. - - **adapter_name**: For example, in case of PEFT, some keys will be pre-pended + - **adapter_name**: For example, in case of PEFT, some keys will be prepended with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in `get_peft_model_state_dict` method: https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 @@ -290,7 +290,7 @@ def convert_state_dict_to_kohya(state_dict, original_type=None, **kwargs): kwargs (`dict`, *args*): Additional arguments to pass to the method. - - **adapter_name**: For example, in case of PEFT, some keys will be pre-pended + - **adapter_name**: For example, in case of PEFT, some keys will be prepended with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in `get_peft_model_state_dict` method: https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 diff --git a/src/diffusers/utils/torch_utils.py b/src/diffusers/utils/torch_utils.py index a5df07e4a3c2..19c076a4a6c7 100644 --- a/src/diffusers/utils/torch_utils.py +++ b/src/diffusers/utils/torch_utils.py @@ -61,7 +61,7 @@ def randn_tensor( logger.info( f"The passed generator was created on 'cpu' even though a tensor on {device} was expected." f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably" - f" slighly speed up this function by passing a generator that was created on the {device} device." + f" slightly speed up this function by passing a generator that was created on the {device} device." ) elif gen_device_type != device.type and gen_device_type == "cuda": raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.") diff --git a/src/diffusers/video_processor.py b/src/diffusers/video_processor.py index 2da782b463d4..5d0fdde8b483 100644 --- a/src/diffusers/video_processor.py +++ b/src/diffusers/video_processor.py @@ -67,7 +67,7 @@ def preprocess_video(self, video, height: Optional[int] = None, width: Optional[ # ensure the input is a list of videos: # - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray) - # - if it is a single video, it is convereted to a list of one video. + # - if it is a single video, it is converted to a list of one video. if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5: video = list(video) elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video): diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py index 97d1a1cc3830..b566e894b8da 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py @@ -187,6 +187,6 @@ def test_attention_slicing_forward_pass(self): def test_float16_inference(self): super().test_float16_inference() - @unittest.skip("Test not supoorted.") + @unittest.skip("Test not supported.") def test_encode_prompt_works_in_isolation(self): super().test_encode_prompt_works_in_isolation() From 0e6faa58ea346abecd85c522f5b03c0460b98d03 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 04:18:00 +0000 Subject: [PATCH 2/2] Apply style fixes --- .../pipeline_stable_diffusion_model_editing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index a6b876eb181d..d0e3d208f9e7 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -509,7 +509,8 @@ def edit_model( The destination prompt. Must contain all words from `source_prompt` with additional ones to specify the target edit. lamb (`float`, *optional*, defaults to 0.1): - The lambda parameter specifying the regularization intensity. Smaller values increase the editing power. + The lambda parameter specifying the regularization intensity. Smaller values increase the editing + power. restart_params (`bool`, *optional*, defaults to True): Restart the model parameters to their pre-trained version before editing. This is done to avoid edit compounding. When it is `False`, edits accumulate.