diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py index ad38aed66fc..e13ff4faba6 100644 --- a/intermediate_source/reinforcement_q_learning.py +++ b/intermediate_source/reinforcement_q_learning.py @@ -426,7 +426,8 @@ def optimize_model(): expected_state_action_values = (next_state_values * GAMMA) + reward_batch # Compute Huber loss - loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) + criterion = nn.SmoothL1Loss() + loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1)) # Optimize the model optimizer.zero_grad()