pytorch · svekars · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -53,8 +53,6 @@
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
     "intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed.
     "prototype_source/gpu_quantization_torchao_tutorial", # enable when 3194
-    "advanced_source/pendulum", # enable when 3195 is fixed
-    "intermediate_source/reinforcement_ppo" # enable when 3195 is fixed 
 ]
 
 def tutorial_source_dirs() -> List[Path]:

diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
@@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
         record_frames=1000,
         policy_exploration=actor_model_explore,
         environment=environment,
-        exploration_type=ExplorationType.MEAN,
+        exploration_type=ExplorationType.DETERMINISTIC,
         record_interval=record_interval,
     )
     return recorder_obj

diff --git a/advanced_source/pendulum.py b/advanced_source/pendulum.py
@@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
     env,
     # ``Unsqueeze`` the observations that we will concatenate
     UnsqueezeTransform(
-        unsqueeze_dim=-1,
+        dim=-1,
         in_keys=["th", "thdot"],
         in_keys_inv=["th", "thdot"],
     ),

diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py
@@ -433,7 +433,7 @@
     exploration_module.step(data.numel())
     updater.step()
 
-    with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+    with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
         rollout = env.rollout(10000, stoch_policy)
         traj_lens.append(rollout.get(("next", "step_count")).max().item())
 

diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
@@ -419,8 +419,8 @@
     in_keys=["loc", "scale"],
     distribution_class=TanhNormal,
     distribution_kwargs={
-        "min": env.action_spec.space.low,
-        "max": env.action_spec.space.high,
+        "low": env.action_spec.space.low,
+        "high": env.action_spec.space.high,
     },
     return_log_prob=True,
     # we'll need the log-prob for the numerator of the importance weights
@@ -639,7 +639,7 @@
         # number of steps (1000, which is our ``env`` horizon).
         # The ``rollout`` method of the ``env`` can take a policy as argument:
         # it will then execute this policy at each step.
-        with set_exploration_type(ExplorationType.MEAN), torch.no_grad():
+        with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())