Fix torchrl scripts

vmoens · vmoens · commit fe0e8243c540 · 2024-12-20T10:49:08.000Z
diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
         record_frames=1000,
         policy_exploration=actor_model_explore,
         environment=environment,
-        exploration_type=ExplorationType.MEAN,
+        exploration_type=ExplorationType.DETERMINISTIC,
         record_interval=record_interval,
     )
     return recorder_obj
diff --git a/advanced_source/pendulum.py b/advanced_source/pendulum.py
@@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
     env,
     # ``Unsqueeze`` the observations that we will concatenate
     UnsqueezeTransform(
-        unsqueeze_dim=-1,
+        dim=-1,
         in_keys=["th", "thdot"],
         in_keys_inv=["th", "thdot"],
     ),
diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py
@@ -433,7 +433,7 @@
     exploration_module.step(data.numel())
     updater.step()
 
-    with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+    with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
         rollout = env.rollout(10000, stoch_policy)
         traj_lens.append(rollout.get(("next", "step_count")).max().item())
 
diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
@@ -419,8 +419,8 @@
     in_keys=["loc", "scale"],
     distribution_class=TanhNormal,
     distribution_kwargs={
-        "min": env.action_spec.space.low,
-        "max": env.action_spec.space.high,
+        "low": env.action_spec.space.low,
+        "high": env.action_spec.space.high,
     },
     return_log_prob=True,
     # we'll need the log-prob for the numerator of the importance weights
@@ -639,7 +639,7 @@
         # number of steps (1000, which is our ``env`` horizon).
         # The ``rollout`` method of the ``env`` can take a policy as argument:
         # it will then execute this policy at each step.
-        with set_exploration_type(ExplorationType.MEAN), torch.no_grad():
+        with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())

Original file line number	Diff line number	Diff line change
`@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):`
`893`	`893`	`record_frames=1000,`
`894`	`894`	`policy_exploration=actor_model_explore,`
`895`	`895`	`environment=environment,`
`896`		`- exploration_type=ExplorationType.MEAN,`
	`896`	`+ exploration_type=ExplorationType.DETERMINISTIC,`
`897`	`897`	`record_interval=record_interval,`
`898`	`898`	`)`
`899`	`899`	`return recorder_obj`