diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index 0c42754c70a..fb7bf2f8c95 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -53,8 +53,6 @@ "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. "intermediate_source/torch_export_tutorial", # reenable after 2940 is fixed. "prototype_source/gpu_quantization_torchao_tutorial", # enable when 3194 - "advanced_source/pendulum", # enable when 3195 is fixed - "intermediate_source/reinforcement_ppo" # enable when 3195 is fixed ] def tutorial_source_dirs() -> List[Path]: diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py index c634932971b..3ee4ddc39bd 100644 --- a/advanced_source/coding_ddpg.py +++ b/advanced_source/coding_ddpg.py @@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval): record_frames=1000, policy_exploration=actor_model_explore, environment=environment, - exploration_type=ExplorationType.MEAN, + exploration_type=ExplorationType.DETERMINISTIC, record_interval=record_interval, ) return recorder_obj diff --git a/advanced_source/pendulum.py b/advanced_source/pendulum.py index 38524cfff40..b30ef42c5c4 100644 --- a/advanced_source/pendulum.py +++ b/advanced_source/pendulum.py @@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"): env, # ``Unsqueeze`` the observations that we will concatenate UnsqueezeTransform( - unsqueeze_dim=-1, + dim=-1, in_keys=["th", "thdot"], in_keys_inv=["th", "thdot"], ), diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py index 6ea09559392..bcc484f0a00 100644 --- a/intermediate_source/dqn_with_rnn_tutorial.py +++ b/intermediate_source/dqn_with_rnn_tutorial.py @@ -433,7 +433,7 @@ exploration_module.step(data.numel()) updater.step() - with set_exploration_type(ExplorationType.MODE), torch.no_grad(): + with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad(): rollout = env.rollout(10000, stoch_policy) traj_lens.append(rollout.get(("next", "step_count")).max().item()) diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py index 30216ff880c..b25a6f8c8ac 100644 --- a/intermediate_source/reinforcement_ppo.py +++ b/intermediate_source/reinforcement_ppo.py @@ -419,8 +419,8 @@ in_keys=["loc", "scale"], distribution_class=TanhNormal, distribution_kwargs={ - "min": env.action_spec.space.low, - "max": env.action_spec.space.high, + "low": env.action_spec.space.low, + "high": env.action_spec.space.high, }, return_log_prob=True, # we'll need the log-prob for the numerator of the importance weights @@ -639,7 +639,7 @@ # number of steps (1000, which is our ``env`` horizon). # The ``rollout`` method of the ``env`` can take a policy as argument: # it will then execute this policy at each step. - with set_exploration_type(ExplorationType.MEAN), torch.no_grad(): + with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad(): # execute a rollout with the trained policy eval_rollout = env.rollout(1000, policy_module) logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())