We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f1fd16f commit 8dfdda4Copy full SHA for 8dfdda4
intermediate_source/reinforcement_q_learning.py
@@ -426,7 +426,8 @@ def optimize_model():
426
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
427
428
# Compute Huber loss
429
- loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))
+ criterion = nn.SmoothL1Loss()
430
+ loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))
431
432
# Optimize the model
433
optimizer.zero_grad()
0 commit comments