Merge branch 'main' into main

svekars · web-flow · commit 8008f9033e95 · 2024-09-09T10:27:42.000-07:00
diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
@@ -64,7 +64,7 @@ pyopengl
 gymnasium[mujoco]==0.27.0
 timm
 iopath
-pygame==2.1.2
+pygame==2.6.0
 pycocotools
 semilearn==0.3.2
 torchao==0.0.3
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1 @@
-[submodule "src/pytorch-sphinx-theme"]
-	path = src/pytorch-sphinx-theme
-	url = https://github.com/pytorch/pytorch_sphinx_theme
+
diff --git a/beginner_source/deeplabv3_on_android.rst b/beginner_source/deeplabv3_on_android.rst
@@ -5,6 +5,10 @@ Image Segmentation DeepLabV3 on Android
 
 **Reviewed by**: `Jeremiah Chung <https://github.com/jeremiahschung>`_
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review our `end-to-end workflows <https://github.com/pytorch/executorch/tree/main/examples/portable#readme>`_ and review the `source code for DeepLabV3 <https://github.com/pytorch/executorch/tree/main/examples/models/deeplab_v3>`_.
+
+
 Introduction
 ------------
 
diff --git a/beginner_source/onnx/intro_onnx.py b/beginner_source/onnx/intro_onnx.py
@@ -39,13 +39,14 @@
 
   - `ONNX <https://onnx.ai>`_ standard library
   - `ONNX Script <https://onnxscript.ai>`_ library that enables developers to author ONNX operators,
-    functions and models using a subset of Python in an expressive, and yet simple fashion.
+    functions and models using a subset of Python in an expressive, and yet simple fashion
+  - `ONNX Runtime <https://onnxruntime.ai>`_ accelerated machine learning library.
 
 They can be installed through `pip <https://pypi.org/project/pip/>`_:
 
 .. code-block:: bash
 
-  pip install --upgrade onnx onnxscript
+  pip install --upgrade onnx onnxscript onnxruntime
 
 To validate the installation, run the following commands:
 
diff --git a/conf.py b/conf.py
@@ -67,6 +67,12 @@
 #
 # needs_sphinx = '1.0'
 
+html_meta = {
+    'description': 'Master PyTorch with our step-by-step tutorials for all skill levels. Start your journey to becoming a PyTorch expert today!',
+    'keywords': 'PyTorch, tutorials, Getting Started, deep learning, AI',
+    'author': 'PyTorch Contributors'
+}
+
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
diff --git a/index.rst b/index.rst
@@ -3,6 +3,7 @@ Welcome to PyTorch Tutorials
 
 **What's new in PyTorch tutorials?**
 
+* `torch.export AOTInductor Tutorial for Python runtime (Beta) <https://pytorch.org/tutorials/recipes/torch_export_aoti_python.html>`__
 * `A guide on good usage of non_blocking and pin_memory() in PyTorch <https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html>`__
 * `Introduction to Distributed Pipeline Parallelism <https://pytorch.org/tutorials/intermediate/pipelining_tutorial.html>`__
 * `Introduction to Libuv TCPStore Backend <https://pytorch.org/tutorials/intermediate/TCPStore_libuv_backend.html>`__ 
diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py
@@ -73,17 +73,21 @@ def foo(x, y):
 
 ######################################################################
 # Alternatively, we can decorate the function.
+t1 = torch.randn(10, 10)
+t2 = torch.randn(10, 10)
 
 @torch.compile
 def opt_foo2(x, y):
     a = torch.sin(x)
     b = torch.cos(y)
     return a + b
-print(opt_foo2(torch.randn(10, 10), torch.randn(10, 10)))
+print(opt_foo2(t1, t2))
 
 ######################################################################
 # We can also optimize ``torch.nn.Module`` instances.
 
+t = torch.randn(10, 100)
+
 class MyModule(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -94,7 +98,101 @@ def forward(self, x):
 
 mod = MyModule()
 opt_mod = torch.compile(mod)
-print(opt_mod(torch.randn(10, 100)))
+print(opt_mod(t))
+
+######################################################################
+# torch.compile and Nested Calls
+# ------------------------------
+# Nested function calls within the decorated function will also be compiled.
+
+def nested_function(x):
+    return torch.sin(x)
+
+@torch.compile
+def outer_function(x, y):
+    a = nested_function(x)
+    b = torch.cos(y)
+    return a + b
+
+print(outer_function(t1, t2))
+
+######################################################################
+# In the same fashion, when compiling a module all sub-modules and methods
+# within it, that are not in a skip list, are also compiled.
+
+class OuterModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.inner_module = MyModule()
+        self.outer_lin = torch.nn.Linear(10, 2)
+
+    def forward(self, x):
+        x = self.inner_module(x)
+        return torch.nn.functional.relu(self.outer_lin(x))
+
+outer_mod = OuterModule()
+opt_outer_mod = torch.compile(outer_mod)
+print(opt_outer_mod(t))
+
+######################################################################
+# We can also disable some functions from being compiled by using
+# ``torch.compiler.disable``. Suppose you want to disable the tracing on just
+# the ``complex_function`` function, but want to continue the tracing back in
+# ``complex_conjugate``. In this case, you can use
+# ``torch.compiler.disable(recursive=False)`` option. Otherwise, the default is
+# ``recursive=True``.
+
+def complex_conjugate(z):
+    return torch.conj(z)
+
+@torch.compiler.disable(recursive=False)
+def complex_function(real, imag):
+    # Assuming this function cause problems in the compilation
+    z = torch.complex(real, imag)
+    return complex_conjugate(z)
+
+def outer_function():
+    real = torch.tensor([2, 3], dtype=torch.float32)
+    imag = torch.tensor([4, 5], dtype=torch.float32)
+    z = complex_function(real, imag)
+    return torch.abs(z)
+
+# Try to compile the outer_function
+try:
+    opt_outer_function = torch.compile(outer_function)
+    print(opt_outer_function())
+except Exception as e:
+    print("Compilation of outer_function failed:", e)
+
+######################################################################
+# Best Practices and Recommendations
+# ----------------------------------
+#
+# Behavior of ``torch.compile`` with Nested Modules and Function Calls
+#
+# When you use ``torch.compile``, the compiler will try to recursively compile
+# every function call inside the target function or module inside the target
+# function or module that is not in a skip list (such as built-ins, some functions in
+# the torch.* namespace).
+# 
+# **Best Practices:**
+#
+# 1. **Top-Level Compilation:** One approach is to compile at the highest level
+# possible (i.e., when the top-level module is initialized/called) and
+# selectively disable compilation when encountering excessive graph breaks or
+# errors. If there are still many compile issues, compile individual
+# subcomponents instead.
+#
+# 2. **Modular Testing:** Test individual functions and modules with ``torch.compile``
+# before integrating them into larger models to isolate potential issues.
+#
+# 3. **Disable Compilation Selectively:** If certain functions or sub-modules
+# cannot be handled by `torch.compile`, use the `torch.compiler.disable` context
+# managers to recursively exclude them from compilation.
+#
+# 4. **Compile Leaf Functions First:** In complex models with multiple nested
+# functions and modules, start by compiling the leaf functions or modules first.
+# For more information see `TorchDynamo APIs for fine-grained tracing <https://pytorch.org/docs/stable/torch.compiler_fine_grain_apis.html>`__.
 
 ######################################################################
 # Demonstrating Speedups
diff --git a/prototype_source/gpu_quantization_torchao_tutorial.py b/prototype_source/gpu_quantization_torchao_tutorial.py
@@ -35,14 +35,12 @@
 #
 # Segment Anything Model checkpoint setup:
 #
-# 1. Go to the `segment-anything repo <checkpoint https://github.com/facebookresearch/segment-anything/tree/main#model-checkpoints>`_ and download the ``vit_h`` checkpoint. Alternatively, you can just use ``wget``: `wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth --directory-prefix=<path>
+# 1. Go to the `segment-anything repo checkpoint <https://github.com/facebookresearch/segment-anything/tree/main#model-checkpoints>`_ and download the ``vit_h`` checkpoint. Alternatively, you can use ``wget`` (for example, ``wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth --directory-prefix=<path>``).
 # 2. Pass in that directory by editing the code below to say:
 #
-# .. code-block::
-#
-# {sam_checkpoint_base_path}=<path>
+# .. code-block:: bash
 #
-# This was run on an A100-PG509-200 power limited to 330.00 W
+#   {sam_checkpoint_base_path}=<path>
 #
 
 import torch
@@ -297,7 +295,7 @@ def get_sam_model(only_one_block=False, batchsize=1):
 # -----------------
 # In this tutorial, we have learned about the quantization and optimization techniques
 # on the example of the segment anything model.
-
+#
 # In the end, we achieved a full-model apples to apples quantization speedup
 # of about 7.7% on batch size 16 (677.28ms to 729.65ms). We can push this a
 # bit further by increasing the batch size and optimizing other parts of
diff --git a/prototype_source/lite_interpreter.rst b/prototype_source/lite_interpreter.rst
@@ -0,0 +1,9 @@
+(Prototype) Introduce lite interpreter workflow in Android and iOS
+=======================
+
+This tutorial has been moved to https://pytorch.org/tutorials/recipes/mobile_interpreter.html
+
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="0; url='https://pytorch.org/tutorials/recipes/mobile_interpreter.html'" />
diff --git a/recipes_source/distributed_checkpoint_recipe.rst b/recipes_source/distributed_checkpoint_recipe.rst
@@ -289,7 +289,7 @@ the intent is to save or load in "non-distributed" style, meaning entirely in th
     import os
 
     import torch
-    import torch.distributed.checkpoint as DCP
+    import torch.distributed.checkpoint as dcp
     import torch.nn as nn
 
 
diff --git a/recipes_source/mobile_interpreter.rst b/recipes_source/mobile_interpreter.rst
@@ -3,6 +3,9 @@
 
 **Author**: `Chen Lai <https://github.com/cccclai>`_, `Martin Yuan <https://github.com/iseeyuan>`_
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review our new documentation to learn more about how to build `iOS <https://pytorch.org/executorch/stable/demo-apps-ios.html>`_ and `Android <https://pytorch.org/executorch/stable/demo-apps-android.html>`_ apps with ExecuTorch.
+
 Introduction
 ------------
 
diff --git a/recipes_source/mobile_perf.rst b/recipes_source/mobile_perf.rst
@@ -1,6 +1,9 @@
 Pytorch Mobile Performance Recipes
 ==================================
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also learn more about `quantization <https://pytorch.org/executorch/stable/quantization-overview.html>`_, `Hardware acceleration (op fusion using hw) <https://pytorch.org/executorch/stable/examples-end-to-end-to-lower-model-to-delegate.html>`_, and `benchmarking <https://pytorch.org/executorch/stable/sdk-profiling.html>`_ on ExecuTorch’s documentation pages.
+
 Introduction
 ----------------
 Performance (aka latency) is crucial to most, if not all,
@@ -245,7 +248,7 @@ For example, using ResNet-50 and running the following script:
 
 
 
-you would get the following result: 
+you would get the following result:
 
 ::
 
diff --git a/recipes_source/ptmobile_recipes_summary.rst b/recipes_source/ptmobile_recipes_summary.rst
@@ -1,6 +1,9 @@
 Summary of PyTorch Mobile Recipes
 =====================================
 
+.. warning::
+    Note: PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review these `ExecuTorch examples <https://github.com/pytorch/executorch/tree/main/examples#readme>`_.
+
 This summary provides a top level overview of recipes for PyTorch Mobile to help developers choose which recipes to follow for their PyTorch-powered mobile app development.
 
 Introduction
diff --git a/recipes_source/torch_export_aoti_python.py b/recipes_source/torch_export_aoti_python.py
@@ -1,7 +1,11 @@
 # -*- coding: utf-8 -*-
 
 """
-(Beta) ``torch.export`` AOTInductor Tutorial for Python runtime
+.. meta::
+   :description: An end-to-end example of how to use AOTInductor for Python runtime.
+   :keywords: torch.export, AOTInductor, torch._inductor.aot_compile, torch._export.aot_load
+
+``torch.export`` AOTInductor Tutorial for Python runtime (Beta)
 ===============================================================
 **Author:** Ankith Gunapal, Bin Bao, Angela Yi
 """
@@ -18,7 +22,7 @@
 # a shared library that can be run in a non-Python environment.
 #
 #
-# In this tutorial, you will learn an end-to-end example of how to use AOTInductor for python runtime.
+# In this tutorial, you will learn an end-to-end example of how to use AOTInductor for Python runtime.
 # We will look at how  to use :func:`torch._inductor.aot_compile` along with :func:`torch.export.export` to generate a 
 # shared library. Additionally, we will examine how to execute the shared library in Python runtime using :func:`torch._export.aot_load`.
 # You will learn about the speed up seen in the first inference time using AOTInductor, especially when using 

Original file line number	Diff line number	Diff line change
`@@ -35,14 +35,12 @@`
`35`	`35`	`#`
`36`	`36`	`# Segment Anything Model checkpoint setup:`
`37`	`37`	`#`
`38`		-# 1. Go to the `segment-anything repo <checkpoint https://github.com/facebookresearch/segment-anything/tree/main#model-checkpoints>`_ and download the ``vit_h`` checkpoint. Alternatively, you can just use ``wget``: `wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth --directory-prefix=<path>
	`38`	+# 1. Go to the `segment-anything repo checkpoint <https://github.com/facebookresearch/segment-anything/tree/main#model-checkpoints>`_ and download the ``vit_h`` checkpoint. Alternatively, you can use ``wget`` (for example, ``wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth --directory-prefix=<path>``).
`39`	`39`	`# 2. Pass in that directory by editing the code below to say:`
`40`	`40`	`#`
`41`		`-# .. code-block::`
`42`		`-#`
`43`		`-# {sam_checkpoint_base_path}=<path>`
	`41`	`+# .. code-block:: bash`
`44`	`42`	`#`
`45`		`-# This was run on an A100-PG509-200 power limited to 330.00 W`
	`43`	`+# {sam_checkpoint_base_path}=<path>`
`46`	`44`	`#`
`47`	`45`
`48`	`46`	`import torch`
`@@ -297,7 +295,7 @@ def get_sam_model(only_one_block=False, batchsize=1):`
`297`	`295`	`# -----------------`
`298`	`296`	`# In this tutorial, we have learned about the quantization and optimization techniques`
`299`	`297`	`# on the example of the segment anything model.`
`300`		`-`
	`298`	`+#`
`301`	`299`	`# In the end, we achieved a full-model apples to apples quantization speedup`
`302`	`300`	`# of about 7.7% on batch size 16 (677.28ms to 729.65ms). We can push this a`
`303`	`301`	`# bit further by increasing the batch size and optimizing other parts of`