amend

Vincent Moens · Vincent Moens · commit bff42d1d9464 · 2024-07-29T18:37:49.000-04:00
diff --git a/intermediate_source/pinmem_nonblock.py b/intermediate_source/pinmem_nonblock.py
@@ -23,6 +23,13 @@
 - While ``cpu_tensor.to("cuda", non_blocking=True).mean()`` executes correctly, attempting
   ``cuda_tensor.to("cpu", non_blocking=True).mean()`` will result in erroneous outputs.
 
+Preamble
+~~~~~~~~
+
+The performance reported in this tutorial are conditioned on the system used to build the tutorial.
+Although the conclusions should be applicable across different systems, the specific observations may vary slightly
+depending on the hardware available.
+
 """
 
 import torch
@@ -32,6 +39,14 @@
 
 ######################################################################
 #
+# This tutorial requires tensordict to be installed. If you don't have tensordict in your environment yet, install it
+# by running the following command in a separate cell:
+#
+# .. code-block:: bash
+#
+#    # Install tensordict with the following command
+#    !pip3 install tensordict
+#
 # We start by outlining the theory surrounding these concepts, and then move to concrete test examples of the features.
 #
 # - :ref:`Background <pinned_memory_background>`
@@ -136,7 +151,6 @@
 
 import contextlib
 
-import torch
 from torch.cuda import Stream
 
 
@@ -589,10 +603,6 @@ def pin_copy_to_device_nonblocking(*tensors):
 # ``pin_memory()`` before proceeding with to ``to(device)``.
 # This approach can further accelerate data transfers, as demonstrated in the following example.
 #
-# .. code-block:: bash
-#
-#    # Install tensordict with the following command
-#    !pip3 install tensordict
 #
 
 from tensordict import TensorDict