pytorch
diff --git a/‎.ci/docker/requirements.txt
Lines changed: 4 additions & 3 deletions b/‎.ci/docker/requirements.txt
Lines changed: 4 additions & 3 deletions
diff --git a/‎.jenkins/build.sh
Lines changed: 2 additions & 2 deletions b/‎.jenkins/build.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.jenkins/validate_tutorials_built.py
Lines changed: 0 additions & 3 deletions b/‎.jenkins/validate_tutorials_built.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎_static/img/distributed/tcpstore_barrier_time.png
486 KB b/‎_static/img/distributed/tcpstore_barrier_time.png
486 KB
diff --git a/‎_static/img/distributed/tcpstore_init_time.png
423 KB b/‎_static/img/distributed/tcpstore_init_time.png
423 KB
diff --git a/‎_static/img/onnx/custom_aten_add_function.png
3.52 KB b/‎_static/img/onnx/custom_aten_add_function.png
3.52 KB
diff --git a/‎_static/img/onnx/custom_aten_gelu_function.png
-22.1 KB b/‎_static/img/onnx/custom_aten_gelu_function.png
-22.1 KB
diff --git a/‎_static/img/onnx/custom_aten_gelu_model.png
19 KB b/‎_static/img/onnx/custom_aten_gelu_model.png
19 KB
diff --git a/‎_static/img/pinmem/pinmem.png
72 KB b/‎_static/img/pinmem/pinmem.png
72 KB
diff --git a/‎_static/img/pinmem/trace_streamed0_pinned0.png
81.2 KB b/‎_static/img/pinmem/trace_streamed0_pinned0.png
81.2 KB
diff --git a/‎_static/img/pinmem/trace_streamed0_pinned1.png
81.4 KB b/‎_static/img/pinmem/trace_streamed0_pinned1.png
81.4 KB
diff --git a/‎_static/img/pinmem/trace_streamed1_pinned0.png
85.4 KB b/‎_static/img/pinmem/trace_streamed1_pinned0.png
85.4 KB
diff --git a/‎_static/img/pinmem/trace_streamed1_pinned1.png
90.6 KB b/‎_static/img/pinmem/trace_streamed1_pinned1.png
90.6 KB
diff --git a/‎advanced_source/coding_ddpg.py
Lines changed: 18 additions & 16 deletions b/‎advanced_source/coding_ddpg.py
Lines changed: 18 additions & 16 deletions
diff --git a/‎advanced_source/cpp_custom_ops.rst
Lines changed: 1 addition & 1 deletion b/‎advanced_source/cpp_custom_ops.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎advanced_source/cpp_extension.rst
Lines changed: 5 additions & 1 deletion b/‎advanced_source/cpp_extension.rst
Lines changed: 5 additions & 1 deletion
diff --git a/‎advanced_source/custom_ops_landing_page.rst
Lines changed: 7 additions & 6 deletions b/‎advanced_source/custom_ops_landing_page.rst
Lines changed: 7 additions & 6 deletions
diff --git a/‎advanced_source/dispatcher.rst
Lines changed: 5 additions & 0 deletions b/‎advanced_source/dispatcher.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎advanced_source/python_custom_ops.py
Lines changed: 1 addition & 1 deletion b/‎advanced_source/python_custom_ops.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎advanced_source/torch_script_custom_ops.rst
Lines changed: 5 additions & 0 deletions b/‎advanced_source/torch_script_custom_ops.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎beginner_source/knowledge_distillation_tutorial.py
Lines changed: 1 addition & 1 deletion b/‎beginner_source/knowledge_distillation_tutorial.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/onnx/onnx_registry_tutorial.py
Lines changed: 11 additions & 20 deletions b/‎beginner_source/onnx/onnx_registry_tutorial.py
Lines changed: 11 additions & 20 deletions
diff --git a/‎en-wordlist.txt
Lines changed: 11 additions & 3 deletions b/‎en-wordlist.txt
Lines changed: 11 additions & 3 deletions
@@ -13,7 +13,7 @@ tqdm==4.66.1
 numpy==1.24.4
 matplotlib
 librosa
-torch==2.3
+torch==2.4
 torchvision
 torchtext
 torchdata
@@ -28,8 +28,9 @@ tensorboard
 jinja2==3.1.3
 pytorch-lightning
 torchx
-torchrl==0.3.0
-tensordict==0.3.0
+# TODO: use stable 0.5 when released
+-e git+https://github.com/pytorch/rl.git#egg=torchrl
+-e git+https://github.com/pytorch/tensordict.git#egg=tensordict
 ax-platform
 nbformat>==5.9.2
 datasets
 
@@ -21,9 +21,9 @@ sudo apt-get install -y pandoc
 
 #Install PyTorch Nightly for test.
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
-# Install 2.2 for testing - uncomment to install nightly binaries (update the version as needed).
+# Install 2.4 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 # pip uninstall -y torch torchvision torchaudio torchtext torchdata
-# pip3 install torch==2.3.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu121
+# pip3 install torch==2.4.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 
 # Install two language tokenizers for Translation with TorchText tutorial
 python -m spacy download en_core_web_sm
 
@@ -29,7 +29,6 @@
     "intermediate_source/fx_conv_bn_fuser",
     "intermediate_source/_torch_export_nightly_tutorial",  # does not work on release
     "advanced_source/super_resolution_with_onnxruntime",
-    "advanced_source/python_custom_ops",  # https://github.com/pytorch/pytorch/issues/127443
     "advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker
     "prototype_source/fx_graph_mode_ptq_dynamic",
     "prototype_source/vmap_recipe",
@@ -54,8 +53,6 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
-    "intermediate_source/inductor_debug_cpu", # reenable after 2942 
-    "beginner_source/onnx/onnx_registry_tutorial", # reenable after 2941 is fixed.
     "intermediate_source/torch_export_tutorial" # reenable after 2940 is fixed.
 ]
 
 
@@ -182,7 +182,7 @@
 # Later, we will see how the target parameters should be updated in TorchRL.
 #
 
-from tensordict.nn import TensorDictModule
+from tensordict.nn import TensorDictModule, TensorDictSequential
 
 
 def _init(
@@ -290,12 +290,11 @@ def _loss_actor(
 ) -> torch.Tensor:
     td_copy = tensordict.select(*self.actor_in_keys)
     # Get an action from the actor network: since we made it functional, we need to pass the params
-    td_copy = self.actor_network(td_copy, params=self.actor_network_params)
+    with self.actor_network_params.to_module(self.actor_network):
+        td_copy = self.actor_network(td_copy)
     # get the value associated with that action
-    td_copy = self.value_network(
-        td_copy,
-        params=self.value_network_params.detach(),
-    )
+    with self.value_network_params.detach().to_module(self.value_network):
+        td_copy = self.value_network(td_copy)
     return -td_copy.get("state_action_value")
 
 
@@ -317,7 +316,8 @@ def _loss_value(
     td_copy = tensordict.clone()
 
     # V(s, a)
-    self.value_network(td_copy, params=self.value_network_params)
+    with self.value_network_params.to_module(self.value_network):
+        self.value_network(td_copy)
     pred_val = td_copy.get("state_action_value").squeeze(-1)
 
     # we manually reconstruct the parameters of the actor-critic, where the first
@@ -332,9 +332,8 @@ def _loss_value(
         batch_size=self.target_actor_network_params.batch_size,
         device=self.target_actor_network_params.device,
     )
-    target_value = self.value_estimator.value_estimate(
-        tensordict, target_params=target_params
-    ).squeeze(-1)
+    with target_params.to_module(self.actor_critic):
+        target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
 
     # Computes the value loss: L2, L1 or smooth L1 depending on `self.loss_function`
     loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_function)
@@ -717,7 +716,7 @@ def get_env_stats():
     ActorCriticWrapper,
     DdpgMlpActor,
     DdpgMlpQNet,
-    OrnsteinUhlenbeckProcessWrapper,
+    OrnsteinUhlenbeckProcessModule,
     ProbabilisticActor,
     TanhDelta,
     ValueOperator,
@@ -776,15 +775,18 @@ def make_ddpg_actor(
 # Exploration
 # ~~~~~~~~~~~
 #
-# The policy is wrapped in a :class:`~torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
+# The policy is passed into a :class:`~torchrl.modules.OrnsteinUhlenbeckProcessModule`
 # exploration module, as suggested in the original paper.
 # Let's define the number of frames before OU noise reaches its minimum value
 annealing_frames = 1_000_000
 
-actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
+actor_model_explore = TensorDictSequential(
     actor,
-    annealing_num_steps=annealing_frames,
-).to(device)
+    OrnsteinUhlenbeckProcessModule(
+        spec=actor.spec.clone(),
+        annealing_num_steps=annealing_frames,
+    ).to(device),
+)
 if device == torch.device("cpu"):
     actor_model_explore.share_memory()
 
@@ -1168,7 +1170,7 @@ def ceil_div(x, y):
         )
 
     # update the exploration strategy
-    actor_model_explore.step(current_frames)
+    actor_model_explore[1].step(current_frames)
 
 collector.shutdown()
 del collector
 
@@ -417,4 +417,4 @@ Conclusion
 In this tutorial, we went over the recommended approach to integrating Custom C++
 and CUDA operators with PyTorch. The ``TORCH_LIBRARY/torch.library`` APIs are fairly
 low-level. For more information about how to use the API, see
-`The Custom Operators Manual <https://pytorch.org/docs/main/notes/custom_operators.html>`_.
+`The Custom Operators Manual <https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html#the-custom-operators-manual>`_.
@@ -2,6 +2,10 @@ Custom C++ and CUDA Extensions
 ==============================
 **Author**: `Peter Goldsborough <https://www.goldsborough.me/>`_
 
+.. warning::
+
+    This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page`
+    for the newest up-to-date guides on extending PyTorch with Custom C++/CUDA Extensions.
 
 PyTorch provides a plethora of operations related to neural networks, arbitrary
 tensor algebra, data wrangling and other purposes. However, you may still find
@@ -225,7 +229,7 @@ Instead of:
 Currently open issue for nvcc bug `here
 <https://github.com/pytorch/pytorch/issues/69460>`_.
 Complete workaround code example `here
-<https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48>`_. 
+<https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48>`_.
 
 Forward Pass
 ************
 
@@ -1,7 +1,7 @@
 .. _custom-ops-landing-page:
 
-PyTorch Custom Operators Landing Page
-=====================================
+PyTorch Custom Operators
+===========================
 
 PyTorch offers a large library of operators that work on Tensors (e.g. ``torch.add``,
 ``torch.sum``, etc). However, you may wish to bring a new custom operation to PyTorch
@@ -10,26 +10,27 @@ In order to do so, you must register the custom operation with PyTorch via the P
 `torch.library docs <https://pytorch.org/docs/stable/library.html>`_ or C++ ``TORCH_LIBRARY``
 APIs.
 
-TL;DR
------
+
 
 Authoring a custom operator from Python
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Please see :ref:`python-custom-ops-tutorial`.
 
 You may wish to author a custom operator from Python (as opposed to C++) if:
+
 - you have a Python function you want PyTorch to treat as an opaque callable, especially with
-respect to ``torch.compile`` and ``torch.export``.
+  respect to ``torch.compile`` and ``torch.export``.
 - you have some Python bindings to C++/CUDA kernels and want those to compose with PyTorch
-subsystems (like ``torch.compile`` or ``torch.autograd``)
+  subsystems (like ``torch.compile`` or ``torch.autograd``)
 
 Integrating custom C++ and/or CUDA code with PyTorch
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Please see :ref:`cpp-custom-ops-tutorial`.
 
 You may wish to author a custom operator from C++ (as opposed to Python) if:
+
 - you have custom C++ and/or CUDA code.
 - you plan to use this code with ``AOTInductor`` to do Python-less inference.
 
 
@@ -1,6 +1,11 @@
 Registering a Dispatched Operator in C++
 ========================================
 
+.. warning::
+
+    This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page`
+    for the newest up-to-date guides on extending PyTorch with Custom Operators.
+
 The dispatcher is an internal component of PyTorch which is responsible for
 figuring out what code should actually get run when you call a function like
 ``torch::add``.  This can be nontrivial, because PyTorch operations need
 
@@ -260,5 +260,5 @@ def f(x):
 # For more detailed information, see:
 #
 # - `the torch.library documentation <https://pytorch.org/docs/stable/library.html>`_
-# - `the Custom Operators Manual <https://pytorch.org/docs/main/notes/custom_operators.html>`_
+# - `the Custom Operators Manual <https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html#the-custom-operators-manual>`_
 #
@@ -1,6 +1,11 @@
 Extending TorchScript with Custom C++ Operators
 ===============================================
 
+.. warning::
+
+    This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page`
+    for the newest up-to-date guides on PyTorch Custom Operators.
+
 The PyTorch 1.0 release introduced a new programming model to PyTorch called
 `TorchScript <https://pytorch.org/docs/master/jit.html>`_. TorchScript is a
 subset of the Python programming language which can be parsed, compiled and
 
@@ -352,7 +352,7 @@ def train_knowledge_distillation(teacher, student, train_loader, epochs, learnin
 # Cosine loss minimization run
 # ----------------------------
 # Feel free to play around with the temperature parameter that controls the softness of the softmax function and the loss coefficients.
-# In neural networks, it is easy to include to include additional loss functions to the main objectives to achieve goals like better generalization.
+# In neural networks, it is easy to include additional loss functions to the main objectives to achieve goals like better generalization.
 # Let's try including an objective for the student, but now let's focus on their hidden states rather than their output layers.
 # Our goal is to convey information from the teacher's representation to the student by including a naive loss function,
 # whose minimization implies that the flattened vectors that are subsequently passed to the classifiers have become more *similar* as the loss decreases.
 
@@ -99,7 +99,6 @@ def forward(self, input_x, input_y):
 # NOTE: All attributes must be annotated with type hints.
 @onnxscript.script(custom_aten)
 def custom_aten_add(input_x, input_y, alpha: float = 1.0):
-    alpha = opset18.CastLike(alpha, input_y)
     input_y = opset18.Mul(input_y, alpha)
     return opset18.Add(input_x, input_y)
 
@@ -130,9 +129,9 @@ def custom_aten_add(input_x, input_y, alpha: float = 1.0):
 # graph node name is the function name
 assert onnx_program.model_proto.graph.node[0].op_type == "custom_aten_add"
 # function node domain is empty because we use standard ONNX operators
-assert onnx_program.model_proto.functions[0].node[3].domain == ""
+assert {node.domain for node in onnx_program.model_proto.functions[0].node} == {""}
 # function node name is the standard ONNX operator name
-assert onnx_program.model_proto.functions[0].node[3].op_type == "Add"
+assert {node.op_type for node in onnx_program.model_proto.functions[0].node} == {"Add", "Mul", "Constant"}
 
 
 ######################################################################
@@ -231,33 +230,25 @@ def custom_aten_gelu(input_x, approximate: str = "none"):
 
 
 ######################################################################
-# Let's inspect the model and verify the model uses :func:`custom_aten_gelu` instead of
-# :class:`aten::gelu`. Note the graph has one graph nodes for
-# ``custom_aten_gelu``, and inside ``custom_aten_gelu``, there is a function
-# node for ``Gelu`` with namespace ``com.microsoft``.
+# Let's inspect the model and verify the model uses op_type ``Gelu``
+# from namespace ``com.microsoft``.
+#
+# .. note::
+#     :func:`custom_aten_gelu` does not exist in the graph because
+#     functions with fewer than three operators are inlined automatically.
 #
 
 # graph node domain is the custom domain we registered
 assert onnx_program.model_proto.graph.node[0].domain == "com.microsoft"
 # graph node name is the function name
-assert onnx_program.model_proto.graph.node[0].op_type == "custom_aten_gelu"
-# function node domain is the custom domain we registered
-assert onnx_program.model_proto.functions[0].node[0].domain == "com.microsoft"
-# function node name is the node name used in the function
-assert onnx_program.model_proto.functions[0].node[0].op_type == "Gelu"
+assert onnx_program.model_proto.graph.node[0].op_type == "Gelu"
 
 
 ######################################################################
-# The following diagram shows ``custom_aten_gelu_model`` ONNX graph using Netron:
+# The following diagram shows ``custom_aten_gelu_model`` ONNX graph using Netron,
+# we can see the ``Gelu`` node from module ``com.microsoft`` used in the function:
 #
 # .. image:: /_static/img/onnx/custom_aten_gelu_model.png
-#    :width: 70%
-#    :align: center
-#
-# Inside the ``custom_aten_gelu`` function, we can see the ``Gelu`` node from module
-# ``com.microsoft`` used in the function:
-#
-# .. image:: /_static/img/onnx/custom_aten_gelu_function.png
 #
 # That is all we need to do. As an additional step, we can use ONNX Runtime to run the model,
 # and compare the results with PyTorch.
 
@@ -1,3 +1,4 @@
+
 ACL
 ADI
 AOT
@@ -50,6 +51,7 @@ DDP
 DDPG
 DDQN
 DLRM
+DMA
 DNN
 DQN
 DataLoaders
@@ -68,6 +70,8 @@ Ecker
 ExportDB
 FC
 FGSM
+tensordict
+DataLoader's
 FLAVA
 FSDP
 FX
@@ -139,6 +143,7 @@ MKLDNN
 MLP
 MLPs
 MNIST
+MPS
 MUC
 MacBook
 MacOS
@@ -219,6 +224,7 @@ STR
 SVE
 SciPy
 Sequentials
+Sharding
 Sigmoid
 SoTA
 Sohn
@@ -254,6 +260,7 @@ VLDB
 VQA
 VS Code
 ViT
+Volterra
 WMT
 WSI
 WSIs
@@ -336,11 +343,11 @@ dataset’s
 deallocation
 decompositions
 decorrelated
-devicemesh
 deserialize
 deserialized
 desynchronization
 deterministically
+devicemesh
 dimensionality
 dir
 discontiguous
@@ -384,6 +391,7 @@ hessian
 hessians
 histoencoder
 histologically
+homonymous
 hotspot
 hvp
 hyperparameter
@@ -459,6 +467,7 @@ optimizer's
 optimizers
 otsu
 overfitting
+pageable
 parallelizable
 parallelization
 parametrization
@@ -522,7 +531,6 @@ runtime
 runtimes
 scalable
 sharded
-Sharding
 softmax
 sparsified
 sparsifier
@@ -609,4 +617,4 @@ warmstarting
 warmup
 webp
 wsi
-wsis
+wsis
Original file line number	Diff line number	Diff line change
`@@ -260,5 +260,5 @@ def f(x):`
`260`	`260`	`# For more detailed information, see:`
`261`	`261`	`#`
`262`	`262`	# - `the torch.library documentation <https://pytorch.org/docs/stable/library.html>`_
`263`		-# - `the Custom Operators Manual <https://pytorch.org/docs/main/notes/custom_operators.html>`_
	`263`	+# - `the Custom Operators Manual <https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html#the-custom-operators-manual>`_
`264`	`264`	`#`