From 3c98b03346e25dc4266a124fdc77a30d2fb111db Mon Sep 17 00:00:00 2001 From: KH Date: Wed, 6 Sep 2023 08:02:14 +0900 Subject: [PATCH] Fix a typo in scaled_dot_product_attention_tutorial.py ``CausaulSelfAttention`` to ``CausalSelfAttention`` --- intermediate_source/scaled_dot_product_attention_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py index 669e516f2c2..2bfeb46b56c 100644 --- a/intermediate_source/scaled_dot_product_attention_tutorial.py +++ b/intermediate_source/scaled_dot_product_attention_tutorial.py @@ -317,7 +317,7 @@ def generate_rand_batch( # on the same set of functions for both modules. # The reason for this here is that ``torch.compile`` is very good at removing the # framework overhead associated with PyTorch. If your model is launching -# large, efficient CUDA kernels, which in this case ``CausaulSelfAttention`` +# large, efficient CUDA kernels, which in this case ``CausalSelfAttention`` # is, then the overhead of PyTorch can be hidden. # # In reality, your module does not normally consist of a singular