Fix torchrl scripts for PT 2.6 TorchRL>=0.6 (#3199)

vmoens · web-flow · commit 39d3e6ede7f5 · 2024-12-20T11:03:57.000-08:00
Fixes #3195 Fixing TorchRL scripts for Pytorch 2.6 release
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -53,8 +53,6 @@
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
     "intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed.
     "prototype_source/gpu_quantization_torchao_tutorial", # enable when 3194
-    "advanced_source/pendulum", # enable when 3195 is fixed
-    "intermediate_source/reinforcement_ppo" # enable when 3195 is fixed 
 ]
 
 def tutorial_source_dirs() -> List[Path]:
diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
         record_frames=1000,
         policy_exploration=actor_model_explore,
         environment=environment,
-        exploration_type=ExplorationType.MEAN,
+        exploration_type=ExplorationType.DETERMINISTIC,
         record_interval=record_interval,
     )
     return recorder_obj
diff --git a/advanced_source/pendulum.py b/advanced_source/pendulum.py
@@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
     env,
     # ``Unsqueeze`` the observations that we will concatenate
     UnsqueezeTransform(
-        unsqueeze_dim=-1,
+        dim=-1,
         in_keys=["th", "thdot"],
         in_keys_inv=["th", "thdot"],
     ),
diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py
@@ -433,7 +433,7 @@
     exploration_module.step(data.numel())
     updater.step()
 
-    with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+    with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
         rollout = env.rollout(10000, stoch_policy)
         traj_lens.append(rollout.get(("next", "step_count")).max().item())
 
diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
@@ -419,8 +419,8 @@
     in_keys=["loc", "scale"],
     distribution_class=TanhNormal,
     distribution_kwargs={
-        "min": env.action_spec.space.low,
-        "max": env.action_spec.space.high,
+        "low": env.action_spec.space.low,
+        "high": env.action_spec.space.high,
     },
     return_log_prob=True,
     # we'll need the log-prob for the numerator of the importance weights
@@ -639,7 +639,7 @@
         # number of steps (1000, which is our ``env`` horizon).
         # The ``rollout`` method of the ``env`` can take a policy as argument:
         # it will then execute this policy at each step.
-        with set_exploration_type(ExplorationType.MEAN), torch.no_grad():
+        with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())

Original file line number	Diff line number	Diff line change
`@@ -53,8 +53,6 @@`
`53`	`53`	`"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.`
`54`	`54`	`"intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed.`
`55`	`55`	`"prototype_source/gpu_quantization_torchao_tutorial", # enable when 3194`
`56`		`- "advanced_source/pendulum", # enable when 3195 is fixed`
`57`		`- "intermediate_source/reinforcement_ppo" # enable when 3195 is fixed`
`58`	`56`	`]`
`59`	`57`
`60`	`58`	`def tutorial_source_dirs() -> List[Path]:`
Original file line number	Diff line number	Diff line change
`@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):`
`893`	`893`	`record_frames=1000,`
`894`	`894`	`policy_exploration=actor_model_explore,`
`895`	`895`	`environment=environment,`
`896`		`- exploration_type=ExplorationType.MEAN,`
	`896`	`+ exploration_type=ExplorationType.DETERMINISTIC,`
`897`	`897`	`record_interval=record_interval,`
`898`	`898`	`)`
`899`	`899`	`return recorder_obj`