Skip to content

Commit f024e00

Browse files
Fix typos (#2715)
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
1 parent 2120b4e commit f024e00

File tree

12 files changed

+44
-44
lines changed

12 files changed

+44
-44
lines changed

src/diffusers/models/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def __init__(
6969
self.value = nn.Linear(channels, channels)
7070

7171
self.rescale_output_factor = rescale_output_factor
72-
self.proj_attn = nn.Linear(channels, channels, 1)
72+
self.proj_attn = nn.Linear(channels, channels, bias=True)
7373

7474
self._use_memory_efficient_attention_xformers = False
7575
self._attention_op = None

src/diffusers/models/controlnet.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, Atte
344344
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
345345
The instantiated processor class or a dictionary of processor classes that will be set as the processor
346346
of **all** `Attention` layers.
347-
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
347+
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
348348
349349
"""
350350
count = len(self.attn_processors.keys())
@@ -379,34 +379,34 @@ def set_attention_slice(self, slice_size):
379379
Args:
380380
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
381381
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
382-
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
382+
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
383383
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
384384
must be a multiple of `slice_size`.
385385
"""
386386
sliceable_head_dims = []
387387

388-
def fn_recursive_retrieve_slicable_dims(module: torch.nn.Module):
388+
def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module):
389389
if hasattr(module, "set_attention_slice"):
390390
sliceable_head_dims.append(module.sliceable_head_dim)
391391

392392
for child in module.children():
393-
fn_recursive_retrieve_slicable_dims(child)
393+
fn_recursive_retrieve_sliceable_dims(child)
394394

395395
# retrieve number of attention layers
396396
for module in self.children():
397-
fn_recursive_retrieve_slicable_dims(module)
397+
fn_recursive_retrieve_sliceable_dims(module)
398398

399-
num_slicable_layers = len(sliceable_head_dims)
399+
num_sliceable_layers = len(sliceable_head_dims)
400400

401401
if slice_size == "auto":
402402
# half the attention head size is usually a good trade-off between
403403
# speed and memory
404404
slice_size = [dim // 2 for dim in sliceable_head_dims]
405405
elif slice_size == "max":
406406
# make smallest slice possible
407-
slice_size = num_slicable_layers * [1]
407+
slice_size = num_sliceable_layers * [1]
408408

409-
slice_size = num_slicable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
409+
slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
410410

411411
if len(slice_size) != len(sliceable_head_dims):
412412
raise ValueError(

src/diffusers/models/modeling_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
575575
raise ValueError(
576576
f"Cannot load {cls} from {pretrained_model_name_or_path} because the following keys are"
577577
f" missing: \n {', '.join(missing_keys)}. \n Please make sure to pass"
578-
" `low_cpu_mem_usage=False` and `device_map=None` if you want to randomely initialize"
578+
" `low_cpu_mem_usage=False` and `device_map=None` if you want to randomly initialize"
579579
" those weights or else make sure your checkpoint file is correct."
580580
)
581581

@@ -591,7 +591,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
591591
set_module_tensor_to_device(model, param_name, param_device, value=param)
592592
else: # else let accelerate handle loading and dispatching.
593593
# Load weights and dispatch according to the device_map
594-
# by deafult the device_map is None and the weights are loaded on the CPU
594+
# by default the device_map is None and the weights are loaded on the CPU
595595
accelerate.load_checkpoint_and_dispatch(model, model_file, device_map, dtype=torch_dtype)
596596

597597
loading_info = {

src/diffusers/models/resnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ class ResnetBlock2D(nn.Module):
418418
time_embedding_norm (`str`, *optional*, default to `"default"` ): Time scale shift config.
419419
By default, apply timestep embedding conditioning with a simple shift mechanism. Choose "scale_shift" or
420420
"ada_group" for a stronger conditioning with scale and shift.
421-
kernal (`torch.FloatTensor`, optional, default to None): FIR filter, see
421+
kernel (`torch.FloatTensor`, optional, default to None): FIR filter, see
422422
[`~models.resnet.FirUpsample2D`] and [`~models.resnet.FirDownsample2D`].
423423
output_scale_factor (`float`, *optional*, default to be `1.0`): the scale factor to use for the output.
424424
use_in_shortcut (`bool`, *optional*, default to `True`):

src/diffusers/models/transformer_2d.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def __init__(
105105
self.attention_head_dim = attention_head_dim
106106
inner_dim = num_attention_heads * attention_head_dim
107107

108-
# 1. Transformer2DModel can process both standard continous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
108+
# 1. Transformer2DModel can process both standard continuous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
109109
# Define whether input is continuous or discrete depending on configuration
110110
self.is_input_continuous = (in_channels is not None) and (patch_size is None)
111111
self.is_input_vectorized = num_vector_embeds is not None
@@ -198,7 +198,7 @@ def __init__(
198198
# 4. Define output layers
199199
self.out_channels = in_channels if out_channels is None else out_channels
200200
if self.is_input_continuous:
201-
# TODO: should use out_channels for continous projections
201+
# TODO: should use out_channels for continuous projections
202202
if use_linear_projection:
203203
self.proj_out = nn.Linear(inner_dim, in_channels)
204204
else:
@@ -223,7 +223,7 @@ def forward(
223223
"""
224224
Args:
225225
hidden_states ( When discrete, `torch.LongTensor` of shape `(batch size, num latent pixels)`.
226-
When continous, `torch.FloatTensor` of shape `(batch size, channel, height, width)`): Input
226+
When continuous, `torch.FloatTensor` of shape `(batch size, channel, height, width)`): Input
227227
hidden_states
228228
encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, encoder_hidden_states dim)`, *optional*):
229229
Conditional embeddings for cross attention layer. If not given, cross-attention defaults to

src/diffusers/models/unet_1d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class UNet1DModel(ModelMixin, ConfigMixin):
5959
obj:`(32, 32, 64)`): Tuple of block output channels.
6060
mid_block_type (`str`, *optional*, defaults to "UNetMidBlock1D"): block type for middle of UNet.
6161
out_block_type (`str`, *optional*, defaults to `None`): optional output processing of UNet.
62-
act_fn (`str`, *optional*, defaults to None): optional activitation function in UNet blocks.
62+
act_fn (`str`, *optional*, defaults to None): optional activation function in UNet blocks.
6363
norm_num_groups (`int`, *optional*, defaults to 8): group norm member count in UNet blocks.
6464
layers_per_block (`int`, *optional*, defaults to 1): added number of layers in a UNet block.
6565
downsample_each_block (`int`, *optional*, defaults to False:

src/diffusers/models/unet_1d_blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def __init__(self, in_channels, n_head=1, dropout_rate=0.0):
331331
self.key = nn.Linear(self.channels, self.channels)
332332
self.value = nn.Linear(self.channels, self.channels)
333333

334-
self.proj_attn = nn.Linear(self.channels, self.channels, 1)
334+
self.proj_attn = nn.Linear(self.channels, self.channels, bias=True)
335335

336336
self.dropout = nn.Dropout(dropout_rate, inplace=True)
337337

src/diffusers/models/unet_2d_blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2684,7 +2684,7 @@ def __init__(
26842684
dropout=dropout,
26852685
bias=attention_bias,
26862686
cross_attention_dim=None,
2687-
cross_attention_norm=None,
2687+
cross_attention_norm=False,
26882688
)
26892689

26902690
# 2. Cross-Attn

src/diffusers/models/unet_2d_condition.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def __init__(
197197
timestep_input_dim = block_out_channels[0]
198198
else:
199199
raise ValueError(
200-
f"{time_embedding_type} does not exist. Pleaes make sure to use one of `fourier` or `positional`."
200+
f"{time_embedding_type} does not exist. Please make sure to use one of `fourier` or `positional`."
201201
)
202202

203203
self.time_embedding = TimestepEmbedding(
@@ -391,7 +391,7 @@ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, Atte
391391
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
392392
The instantiated processor class or a dictionary of processor classes that will be set as the processor
393393
of **all** `Attention` layers.
394-
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
394+
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
395395
396396
"""
397397
count = len(self.attn_processors.keys())
@@ -425,34 +425,34 @@ def set_attention_slice(self, slice_size):
425425
Args:
426426
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
427427
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
428-
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
428+
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
429429
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
430430
must be a multiple of `slice_size`.
431431
"""
432432
sliceable_head_dims = []
433433

434-
def fn_recursive_retrieve_slicable_dims(module: torch.nn.Module):
434+
def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module):
435435
if hasattr(module, "set_attention_slice"):
436436
sliceable_head_dims.append(module.sliceable_head_dim)
437437

438438
for child in module.children():
439-
fn_recursive_retrieve_slicable_dims(child)
439+
fn_recursive_retrieve_sliceable_dims(child)
440440

441441
# retrieve number of attention layers
442442
for module in self.children():
443-
fn_recursive_retrieve_slicable_dims(module)
443+
fn_recursive_retrieve_sliceable_dims(module)
444444

445-
num_slicable_layers = len(sliceable_head_dims)
445+
num_sliceable_layers = len(sliceable_head_dims)
446446

447447
if slice_size == "auto":
448448
# half the attention head size is usually a good trade-off between
449449
# speed and memory
450450
slice_size = [dim // 2 for dim in sliceable_head_dims]
451451
elif slice_size == "max":
452452
# make smallest slice possible
453-
slice_size = num_slicable_layers * [1]
453+
slice_size = num_sliceable_layers * [1]
454454

455-
slice_size = num_slicable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
455+
slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
456456

457457
if len(slice_size) != len(sliceable_head_dims):
458458
raise ValueError(
@@ -515,7 +515,7 @@ def forward(
515515
returning a tuple, the first element is the sample tensor.
516516
"""
517517
# By default samples have to be AT least a multiple of the overall upsampling factor.
518-
# The overall upsampling factor is equal to 2 ** (# num of upsampling layears).
518+
# The overall upsampling factor is equal to 2 ** (# num of upsampling layers).
519519
# However, the upsampling interpolation output size can be forced to fit any upsampling size
520520
# on the fly if necessary.
521521
default_overall_up_factor = 2**self.num_upsamplers

src/diffusers/pipelines/pipeline_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto
13511351
Args:
13521352
slice_size (`str` or `int`, *optional*, defaults to `"auto"`):
13531353
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
1354-
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
1354+
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
13551355
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
13561356
must be a multiple of `slice_size`.
13571357
"""

src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def __init__(
287287
timestep_input_dim = block_out_channels[0]
288288
else:
289289
raise ValueError(
290-
f"{time_embedding_type} does not exist. Pleaes make sure to use one of `fourier` or `positional`."
290+
f"{time_embedding_type} does not exist. Please make sure to use one of `fourier` or `positional`."
291291
)
292292

293293
self.time_embedding = TimestepEmbedding(
@@ -481,7 +481,7 @@ def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, Atte
481481
`processor (`dict` of `AttentionProcessor` or `AttentionProcessor`):
482482
The instantiated processor class or a dictionary of processor classes that will be set as the processor
483483
of **all** `Attention` layers.
484-
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainablae attention processors.:
484+
In case `processor` is a dict, the key needs to define the path to the corresponding cross attention processor. This is strongly recommended when setting trainable attention processors.:
485485
486486
"""
487487
count = len(self.attn_processors.keys())
@@ -515,34 +515,34 @@ def set_attention_slice(self, slice_size):
515515
Args:
516516
slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
517517
When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
518-
`"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
518+
`"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is
519519
provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
520520
must be a multiple of `slice_size`.
521521
"""
522522
sliceable_head_dims = []
523523

524-
def fn_recursive_retrieve_slicable_dims(module: torch.nn.Module):
524+
def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module):
525525
if hasattr(module, "set_attention_slice"):
526526
sliceable_head_dims.append(module.sliceable_head_dim)
527527

528528
for child in module.children():
529-
fn_recursive_retrieve_slicable_dims(child)
529+
fn_recursive_retrieve_sliceable_dims(child)
530530

531531
# retrieve number of attention layers
532532
for module in self.children():
533-
fn_recursive_retrieve_slicable_dims(module)
533+
fn_recursive_retrieve_sliceable_dims(module)
534534

535-
num_slicable_layers = len(sliceable_head_dims)
535+
num_sliceable_layers = len(sliceable_head_dims)
536536

537537
if slice_size == "auto":
538538
# half the attention head size is usually a good trade-off between
539539
# speed and memory
540540
slice_size = [dim // 2 for dim in sliceable_head_dims]
541541
elif slice_size == "max":
542542
# make smallest slice possible
543-
slice_size = num_slicable_layers * [1]
543+
slice_size = num_sliceable_layers * [1]
544544

545-
slice_size = num_slicable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
545+
slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
546546

547547
if len(slice_size) != len(sliceable_head_dims):
548548
raise ValueError(
@@ -605,7 +605,7 @@ def forward(
605605
returning a tuple, the first element is the sample tensor.
606606
"""
607607
# By default samples have to be AT least a multiple of the overall upsampling factor.
608-
# The overall upsampling factor is equal to 2 ** (# num of upsampling layears).
608+
# The overall upsampling factor is equal to 2 ** (# num of upsampling layers).
609609
# However, the upsampling interpolation output size can be forced to fit any upsampling size
610610
# on the fly if necessary.
611611
default_overall_up_factor = 2**self.num_upsamplers

tests/models/test_models_unet_2d_condition.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,23 +223,23 @@ def test_model_attention_slicing(self):
223223
output = model(**inputs_dict)
224224
assert output is not None
225225

226-
def test_model_slicable_head_dim(self):
226+
def test_model_sliceable_head_dim(self):
227227
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
228228

229229
init_dict["attention_head_dim"] = (8, 16)
230230

231231
model = self.model_class(**init_dict)
232232

233-
def check_slicable_dim_attr(module: torch.nn.Module):
233+
def check_sliceable_dim_attr(module: torch.nn.Module):
234234
if hasattr(module, "set_attention_slice"):
235235
assert isinstance(module.sliceable_head_dim, int)
236236

237237
for child in module.children():
238-
check_slicable_dim_attr(child)
238+
check_sliceable_dim_attr(child)
239239

240240
# retrieve number of attention layers
241241
for module in model.children():
242-
check_slicable_dim_attr(module)
242+
check_sliceable_dim_attr(module)
243243

244244
def test_special_attn_proc(self):
245245
class AttnEasyProc(torch.nn.Module):
@@ -658,7 +658,7 @@ def test_set_attention_slice_list(self):
658658
torch.cuda.reset_max_memory_allocated()
659659
torch.cuda.reset_peak_memory_stats()
660660

661-
# there are 32 slicable layers
661+
# there are 32 sliceable layers
662662
slice_list = 16 * [2, 3]
663663
unet = self.get_unet_model()
664664
unet.set_attention_slice(slice_list)

0 commit comments

Comments
 (0)