update

zou3519 · zou3519 · commit 15ad215cfe56 · 2024-06-17T23:02:38.000-04:00
diff --git a/advanced_source/cpp_custom_ops.rst b/advanced_source/cpp_custom_ops.rst
@@ -47,7 +47,7 @@ Using ``cpp_extension`` is as simple as writing the following ``setup.py``:
         cmdclass={'build_ext': cpp_extension.BuildExtension})
 
 If you need to compile CUDA code (for example, ``.cu`` files), then instead use
-`torch.utils.cpp_extension.CUDAExtension <https://pytorch.org/docs/stable/cpp_extension.html#torch.utils.cpp_extension.CUDAExtension>`_
+`torch.utils.cpp_extension.CUDAExtension <https://pytorch.org/docs/stable/cpp_extension.html#torch.utils.cpp_extension.CUDAExtension>`_.
 Please see how
 `extension-cpp <https://github.com/pytorch/extension-cpp>`_ for an example for
 how this is set up.
@@ -167,14 +167,14 @@ for more details).
 
 .. code-block:: python
 
-	@torch.library.register_fake("extension_cpp::mymuladd")
-	def _(a, b, c):
-	    torch._check(a.shape == b.shape)
-	    torch._check(a.dtype == torch.float)
-	    torch._check(b.dtype == torch.float)
-	    torch._check(a.device == b.device)
-	    return torch.empty_like(a)
-  	
+  @torch.library.register_fake("extension_cpp::mymuladd")
+  def _(a, b, c):
+      torch._check(a.shape == b.shape)
+      torch._check(a.dtype == torch.float)
+      torch._check(b.dtype == torch.float)
+      torch._check(a.device == b.device)
+      return torch.empty_like(a)
+    
 How to add training (autograd) support for an operator
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Use ``torch.library.register_autograd`` to add training support for an operator. Prefer
@@ -353,19 +353,19 @@ When defining the operator, we must specify that it mutates the out Tensor in th
 
 .. code-block:: cpp
 
-	TORCH_LIBRARY(extension_cpp, m) {
-		m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
-		m.def("mymul(Tensor a, Tensor b) -> Tensor");
-		// New!
-		m.def("myadd_out(Tensor a, Tensor b, Tensor(a!) out) -> ()");
-	}
-
-	TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
-		m.impl("mymuladd", &mymuladd_cpu);
-		m.impl("mymul", &mymul_cpu);
-		// New!
-		m.impl("myadd_out", &myadd_out_cpu);
-	}
+  TORCH_LIBRARY(extension_cpp, m) {
+    m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
+    m.def("mymul(Tensor a, Tensor b) -> Tensor");
+    // New!
+    m.def("myadd_out(Tensor a, Tensor b, Tensor(a!) out) -> ()");
+  }
+
+  TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
+    m.impl("mymuladd", &mymuladd_cpu);
+    m.impl("mymul", &mymul_cpu);
+    // New!
+    m.impl("myadd_out", &myadd_out_cpu);
+  }
 
 .. note::
 
diff --git a/advanced_source/python_custom_ops.py b/advanced_source/python_custom_ops.py
@@ -10,67 +10,79 @@
    This tutorial is for PyTorch 2.4+ and the PyTorch nightlies.
 
 PyTorch offers a large library of operators that work on Tensors (e.g.
-torch.add, torch.sum, etc). However, you may wish to use a new customized
+``torch.add``, ``torch.sum``, etc). However, you might wish to use a new customized
 operator with PyTorch, perhaps written by a third-party library. This tutorial
 shows how to wrap Python functions so that they behave like PyTorch native
-operators. Reasons why you may wish to create a custom op in PyTorch include:
+operators. Reasons why you may wish to create a custom operator in PyTorch include:
 
-- Black-box-ing an arbitrary Python function for use with torch.compile
+- Treating an arbitrary Python function as an opaque callable with respect
+  to ``torch.compile`` (that is, prevent ``torch.compile`` from tracing
+  into the function).
 - Adding training support to an arbitrary Python function
 
 Please note that if your operation can be expressed as a composition of
-existing PyTorch ops, then there is usually no need to use the custom op
-API -- everything (e.g. torch.compile, training support) should just work.
+existing PyTorch operators, then there is usually no need to use the custom operator
+API -- everything (for example ``torch.compile``, training support) should
+just work.
 """
 ######################################################################
-# Wrapping PIL's crop into a custom op
+# Example: Wrapping PIL's crop into a custom operator
 # ------------------------------------
-# Let's say that we are using PIL's crop operation.
+# Let's say that we are using PIL's ``crop`` operation.
 
 import torch
 from torchvision.transforms.functional import to_pil_image, pil_to_tensor
 import PIL
 import IPython
+import matplotlib.pyplot as plt
 
 def crop(pic, box):
     img = to_pil_image(pic.cpu())
     cropped_img = img.crop(box)
     return pil_to_tensor(cropped_img).to(pic.device) / 255.
 
 def display(img):
-    img_pil = to_pil_image(img)
-    IPython.display.display(img_pil)
-
+    plt.imshow(img.numpy().transpose((1, 2, 0)))
 
 img = torch.ones(3, 64, 64)
 img *= torch.linspace(0, 1, steps=64) * torch.linspace(0, 1, steps=64).unsqueeze(-1)
 display(img)
+
+######################################################################
+
 cropped_img = crop(img, (10, 10, 50, 50))
 display(cropped_img)
 
 ######################################################################
-# ``crop`` doesn't work performantly out-of-the-box with torch.compile. The
-# following code leads to an error when run.
+# ``crop`` is not handled effectively out-of-the-box by
+# ``torch.compile``: ``torch.compile`` induces a
+# `"graph break" <https://pytorch.org/docs/stable/torch.compiler_faq.html#graph-breaks>`_ 
+# on functions it is unable to handle and graph breaks are bad for performance.
+# The following code demonstrates this by raising an error
+# (``torch.compile`` with ``fullgraph=True`` raises an error if a
+# graph break occurs).
 
-"""
 @torch.compile(fullgraph=True)
 def f(img):
     return crop(img, (10, 10, 50, 50))
 
-cropped_img = f(img)
-"""
+# The following raises an error. Uncomment the line to see it.
+# cropped_img = f(img)
 
 ######################################################################
-# In order to black-box ``crop`` for use with ``torch.compile``, we need to do two things:
+# In order to black-box ``crop`` for use with ``torch.compile``, we need to
+# do two things:
 #
-# - wrap the function into a PyTorch custom op.
-# - add a "FakeTensor kernel" (aka "meta kernel") to the op. Given the metadata (e.g. shapes)
-# of the input Tensors, this function says how to compute the metadata of the output Tensor(s).
+# 1. wrap the function into a PyTorch custom operator.
+# 2. add a "FakeTensor kernel" (aka "meta kernel") to the operator.
+#    Given the metadata (e.g. shapes)
+#    of the input Tensors, this function says how to compute the metadata
+#    of the output Tensor(s).
 
 
 from typing import Sequence
 
-# Use torch.library.custom_op to define a new custom op.
+# Use torch.library.custom_op to define a new custom operator.
 # If your operator mutates any input Tensors, their names must be specified
 # in the mutates_args argument.
 @torch.library.custom_op("mylib::crop", mutates_args=())
@@ -79,22 +91,25 @@ def crop(pic: torch.Tensor, box: Sequence[int]) -> torch.Tensor:
     cropped_img = img.crop(box)
     return (pil_to_tensor(cropped_img) / 255.).to(pic.device, pic.dtype)
 
-# Use register_fake to add a FakeTensor kernel for the op
+# Use register_fake to add a FakeTensor kernel for the operator
 @crop.register_fake
 def _(pic, box):
     channels = pic.shape[0]
     x0, y0, x1, y1 = box
     return pic.new_empty(channels, y1 - y0, x1 - x0)
 
 ######################################################################
-# After this, crop now works with torch.compile:
+# After this, ``crop`` now works whout graph breaks:
 
 @torch.compile(fullgraph=True)
 def f(img):
     return crop(img, (10, 10, 50, 50))
 
 cropped_img = f(img)
 display(img)
+
+######################################################################
+
 display(cropped_img)
 
 ######################################################################
@@ -103,11 +118,11 @@ def f(img):
 # Use ``torch.library.register_autograd`` to add training support for an operator.
 # Prefer this over directly using ``torch.autograd.Function``; some compositions of
 # ``autograd.Function`` with PyTorch operator registration APIs can lead to (and
-# has led to) silent incorrectness.
+# has led to) silent incorrectness when composed with ``torch.compile``.
 #
 # The gradient formula for ``crop`` is essentially ``PIL.paste`` (we'll leave the
 # derivation as an exercise to the reader). Let's first wrap ``paste`` into a
-# custom op:
+# custom operator:
 
 @torch.library.custom_op("mylib::paste", mutates_args=())
 def paste(im1: torch.Tensor, im2: torch.Tensor, coord: Sequence[int]) -> torch.Tensor:
@@ -125,7 +140,7 @@ def _(im1, im2, coord):
     return torch.empty_like(im1)
 
 ######################################################################
-# And now let's use register_autograd to specify the gradient formula for ``crop``:
+# And now let's use ``register_autograd`` to specify the gradient formula for ``crop``:
 
 def backward(ctx, grad_output):
     grad_input = grad_output.new_zeros(ctx.pic_shape)
@@ -141,7 +156,7 @@ def setup_context(ctx, inputs, output):
 
 ######################################################################
 # Note that the backward must be a composition of PyTorch-understood operators,
-# which is why we wrapped paste into a custom op instead of directly using
+# which is why we wrapped paste into a custom operator instead of directly using
 # PIL's paste.
 
 img = img.requires_grad_()
@@ -154,15 +169,15 @@ def setup_context(ctx, inputs, output):
 # (black) in the unused region.
 
 ######################################################################
-# Testing Python Custom Ops
+# Testing Python Custom operators
 # -------------------------
-# Use torch.library.opcheck to test that the custom op was registered
+# Use ``torch.library.opcheck`` to test that the custom operator was registered
 # correctly. This does not test that the gradients are mathematically correct;
-# please write separate tests for that (either manual ones or torch.autograd.gradcheck).
+# please write separate tests for that (either manual ones or ``torch.autograd.gradcheck``).
 #
-# To use opcheck, pass it a set of example inputs to test against. If your
+# To use ``opcheck``, pass it a set of example inputs to test against. If your
 # operator supports training, then the examples should include Tensors that
-# require grad. If your operator supports multiple devices, then the examplesxi
+# require grad. If your operator supports multiple devices, then the examples
 # should include Tensors from each device.
 
 examples = [
@@ -176,14 +191,16 @@ def setup_context(ctx, inputs, output):
     torch.library.opcheck(crop, example)
 
 ######################################################################
-# Mutable Python Custom Ops
+# Mutable Python Custom operators
 # -------------------------
-# You can also wrap a Python function that mutates its inputs into a custom op.
+# You can also wrap a Python function that mutates its inputs into a custom 
+# operator.
 # Functions that mutate inputs are common because that is how many low-level
-# kernels are written; for example, a kernel that computes sin may take in the
-# input and an output tensor and write ``input.sin()`` to the output tensor.
+# kernels are written; for example, a kernel that computes ``sin`` may take in
+# the input and an output tensor and write ``input.sin()`` to the output tensor.
 #
-# We'll use numpy.sin to demonstrate an example of a mutable Python custom op.
+# We'll use ``numpy.sin`` to demonstrate an example of a mutable Python
+# custom operator.
 
 import numpy as np
 
@@ -196,9 +213,8 @@ def numpy_sin(input: torch.Tensor, output: torch.Tensor) -> None:
     np.sin(input_np, out=output_np)
 
 ######################################################################
-# This custom op automatically works with torch.compile.
-# Because the op doesn't return anything, there is no need to register
-# a FakeTensor kernel (meta kernel).
+# Because the operator doesn't return anything, there is no need to register
+# a FakeTensor kernel (meta kernel) to get it to work with ``torch.compile``.
 
 @torch.compile(fullgraph=True)
 def f(x):
@@ -211,8 +227,8 @@ def f(x):
 assert torch.allclose(y, x.sin())
 
 ######################################################################
-# And here's an opcheck run telling us that we did indeed register the op correctly.
-# opcheck would error out if we forgot to add the output to ``mutates_args``, for example.
+# And here's an ``opcheck`` run telling us that we did indeed register the operator correctly.
+# ``opcheck`` would error out if we forgot to add the output to ``mutates_args``, for example.
 
 example_inputs = [
     [torch.randn(3), torch.empty(3)],
@@ -226,6 +242,13 @@ def f(x):
 ######################################################################
 # Conclusion
 # ----------
-# For more information, please see:
+# In this tutorial, we learned how to use ``torch.library.custom_op`` to
+# create a custom operator in Python that works with PyTorch subsystems
+# such as ``torch.compile`` and autograd.
+#
+# This tutorial provides a basic introduction to custom operators.
+# For more detailed information, see:
+#
 # - `the torch.library documentation <https://pytorch.org/docs/stable/library.html>`_
 # - `the Custom Operators Manual <https://pytorch.org/docs/main/notes/custom_operators.html>`_
+#