From 3c98b03346e25dc4266a124fdc77a30d2fb111db Mon Sep 17 00:00:00 2001
From: KH <ganghe74@gmail.com>
Date: Wed, 6 Sep 2023 08:02:14 +0900
Subject: [PATCH] Fix a typo in scaled_dot_product_attention_tutorial.py

``CausaulSelfAttention`` to ``CausalSelfAttention``
---
 intermediate_source/scaled_dot_product_attention_tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py
index 669e516f2c2..2bfeb46b56c 100644
--- a/intermediate_source/scaled_dot_product_attention_tutorial.py
+++ b/intermediate_source/scaled_dot_product_attention_tutorial.py
@@ -317,7 +317,7 @@ def generate_rand_batch(
 # on the same set of functions for both modules.
 # The reason for this here is that ``torch.compile`` is very good at removing the
 # framework overhead associated with PyTorch. If your model is launching
-# large, efficient CUDA kernels, which in this case ``CausaulSelfAttention``
+# large, efficient CUDA kernels, which in this case ``CausalSelfAttention``
 # is, then the overhead of PyTorch can be hidden.
 #
 # In reality, your module does not normally consist of a singular