diff --git a/beginner_source/knowledge_distillation_tutorial.py b/beginner_source/knowledge_distillation_tutorial.py
index 4601352ff03..49ab9a134dc 100644
--- a/beginner_source/knowledge_distillation_tutorial.py
+++ b/beginner_source/knowledge_distillation_tutorial.py
@@ -352,7 +352,7 @@ def train_knowledge_distillation(teacher, student, train_loader, epochs, learnin
 # Cosine loss minimization run
 # ----------------------------
 # Feel free to play around with the temperature parameter that controls the softness of the softmax function and the loss coefficients.
-# In neural networks, it is easy to include to include additional loss functions to the main objectives to achieve goals like better generalization.
+# In neural networks, it is easy to include additional loss functions to the main objectives to achieve goals like better generalization.
 # Let's try including an objective for the student, but now let's focus on their hidden states rather than their output layers.
 # Our goal is to convey information from the teacher's representation to the student by including a naive loss function,
 # whose minimization implies that the flattened vectors that are subsequently passed to the classifiers have become more *similar* as the loss decreases.