@@ -471,7 +471,7 @@ def trimRareWords(voc, pairs, MIN_COUNT):
471
471
# with mini-batches.
472
472
#
473
473
# Using mini-batches also means that we must be mindful of the variation
474
- # of sentence length in our batches. To accomodate sentences of different
474
+ # of sentence length in our batches. To accommodate sentences of different
475
475
# sizes in the same batch, we will make our batched input tensor of shape
476
476
# *(max_length, batch_size)*, where sentences shorter than the
477
477
# *max_length* are zero padded after an *EOS_token*.
@@ -615,7 +615,7 @@ def batch2TrainData(voc, pair_batch):
615
615
# in normal sequential order, and one that is fed the input sequence in
616
616
# reverse order. The outputs of each network are summed at each time step.
617
617
# Using a bidirectional GRU will give us the advantage of encoding both
618
- # past and future context .
618
+ # past and future contexts .
619
619
#
620
620
# Bidirectional RNN:
621
621
#
@@ -700,7 +700,7 @@ def forward(self, input_seq, input_lengths, hidden=None):
700
700
# states to generate the next word in the sequence. It continues
701
701
# generating words until it outputs an *EOS_token*, representing the end
702
702
# of the sentence. A common problem with a vanilla seq2seq decoder is that
703
- # if we rely soley on the context vector to encode the entire input
703
+ # if we rely solely on the context vector to encode the entire input
704
704
# sequence’s meaning, it is likely that we will have information loss.
705
705
# This is especially the case when dealing with long input sequences,
706
706
# greatly limiting the capability of our decoder.
@@ -950,7 +950,7 @@ def maskNLLLoss(inp, target, mask):
950
950
# sequence (or batch of sequences). We use the ``GRU`` layer like this in
951
951
# the ``encoder``. The reality is that under the hood, there is an
952
952
# iterative process looping over each time step calculating hidden states.
953
- # Alternatively, you ran run these modules one time-step at a time. In
953
+ # Alternatively, you can run these modules one time-step at a time. In
954
954
# this case, we manually loop over the sequences during the training
955
955
# process like we must do for the ``decoder`` model. As long as you
956
956
# maintain the correct conceptual model of these modules, implementing
@@ -1115,7 +1115,7 @@ def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, deco
1115
1115
# softmax value. This decoding method is optimal on a single time-step
1116
1116
# level.
1117
1117
#
1118
- # To facilite the greedy decoding operation, we define a
1118
+ # To facilitate the greedy decoding operation, we define a
1119
1119
# ``GreedySearchDecoder`` class. When run, an object of this class takes
1120
1120
# an input sequence (``input_seq``) of shape *(input_seq length, 1)*, a
1121
1121
# scalar input length (``input_length``) tensor, and a ``max_length`` to
0 commit comments