diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index 86e353e15e1..cac4842795b 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -344,7 +344,7 @@ def select_action(state): steps_done += 1 if sample > eps_threshold: with torch.no_grad(): - # t.max(1) will return largest value for column of each row. + # t.max(1) will return largest column value of each row. # second column on max result is index of where max element was # found, so we pick action with the larger expected reward. return policy_net(state).max(1)[1].view(1, 1)