Merge branch 'main' into issue_1993

Svetlana Karslioglu · web-flow · commit 90ff2b106959 · 2023-06-12T11:50:20.000-07:00
diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
@@ -14,7 +14,7 @@
 developed by Leon A. Gatys, Alexander S. Ecker and Matthias Bethge.
 Neural-Style, or Neural-Transfer, allows you to take an image and
 reproduce it with a new artistic style. The algorithm takes three images,
-an input image, a content-image, and a style-image, and changes the input 
+an input image, a content-image, and a style-image, and changes the input
 to resemble the content of the content-image and the artistic style of the style-image.
 
  
@@ -70,6 +70,7 @@
 # method is used to move tensors or modules to a desired device. 
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_default_device(device)
 
 ######################################################################
 # Loading the Images
@@ -261,7 +262,7 @@ def forward(self, input):
 # network to evaluation mode using ``.eval()``.
 # 
 
-cnn = models.vgg19(pretrained=True).features.to(device).eval()
+cnn = models.vgg19(pretrained=True).features.eval()
 
 
 
@@ -271,8 +272,8 @@ def forward(self, input):
 # We will use them to normalize the image before sending it into the network.
 # 
 
-cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
-cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
+cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406])
+cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225])
 
 # create a module to normalize input image so we can easily put it in a
 # ``nn.Sequential``
@@ -308,7 +309,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
                                content_layers=content_layers_default,
                                style_layers=style_layers_default):
     # normalization module
-    normalization = Normalization(normalization_mean, normalization_std).to(device)
+    normalization = Normalization(normalization_mean, normalization_std)
 
     # just in order to have an iterable access to or list of content/style
     # losses
@@ -373,7 +374,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
 #
 # ::
 #
-#    input_img = torch.randn(content_img.data.size(), device=device)
+#    input_img = torch.randn(content_img.data.size())
 
 # add the original input image to the figure:
 plt.figure()
diff --git a/beginner_source/examples_autograd/polynomial_autograd.py b/beginner_source/examples_autograd/polynomial_autograd.py
@@ -18,23 +18,23 @@
 import math
 
 dtype = torch.float
-device = torch.device("cpu")
-# device = torch.device("cuda:0")  # Uncomment this to run on GPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch.set_default_device(device)
 
 # Create Tensors to hold input and outputs.
 # By default, requires_grad=False, which indicates that we do not need to
 # compute gradients with respect to these Tensors during the backward pass.
-x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
 y = torch.sin(x)
 
 # Create random Tensors for weights. For a third order polynomial, we need
 # 4 weights: y = a + b x + c x^2 + d x^3
 # Setting requires_grad=True indicates that we want to compute gradients with
 # respect to these Tensors during the backward pass.
-a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
-d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+a = torch.randn((), dtype=dtype, requires_grad=True)
+b = torch.randn((), dtype=dtype, requires_grad=True)
+c = torch.randn((), dtype=dtype, requires_grad=True)
+d = torch.randn((), dtype=dtype, requires_grad=True)
 
 learning_rate = 1e-6
 for t in range(2000):
diff --git a/prototype_source/quantization_in_pytorch_2_0_export_tutorial.rst b/prototype_source/quantization_in_pytorch_2_0_export_tutorial.rst
@@ -14,54 +14,46 @@ have significantly higher model coverage, better programmability, and
 a simplified UX.
 
 Prerequisites:
------------------------
+^^^^^^^^^^^^^^^^
 
--  `Understanding of torchdynamo concepts in PyTorch <https://pytorch.org/docs/stable/dynamo/index.html>`__
--  `Understanding of the quantization concepts in PyTorch <https://pytorch.org/docs/master/quantization.html#quantization-api-summary>`__
--  `Understanding of FX Graph Mode post training static quantization <https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_static.html>`__
--  `Understanding of BackendConfig in PyTorch Quantization FX Graph Mode <https://pytorch.org/tutorials/prototype/backend_config_tutorial.html?highlight=backend>`__
--  `Understanding of QConfig and QConfigMapping in PyTorch Quantization FX Graph Mode <https://pytorch.org/tutorials/prototype/backend_config_tutorial.html#set-up-qconfigmapping-that-satisfies-the-backend-constraints>`__
+-  `Torchdynamo concepts in PyTorch <https://pytorch.org/docs/stable/dynamo/index.html>`__
+-  `Quantization concepts in PyTorch <https://pytorch.org/docs/master/quantization.html#quantization-api-summary>`__
+-  `FX Graph Mode post training static quantization <https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_static.html>`__
+-  `BackendConfig in PyTorch Quantization FX Graph Mode <https://pytorch.org/tutorials/prototype/backend_config_tutorial.html?highlight=backend>`__
+-  `QConfig and QConfigMapping in PyTorch Quantization FX Graph Mode <https://pytorch.org/tutorials/prototype/backend_config_tutorial.html#set-up-qconfigmapping-that-satisfies-the-backend-constraints>`__
+
+Introduction:
+^^^^^^^^^^^^^^^^
 
 Previously in ``FX Graph Mode Quantization`` we were using ``QConfigMapping`` for users to specify how the model to be quantized
 and ``BackendConfig`` to specify the supported ways of quantization in their backend.
 This API covers most use cases relatively well, but the main problem is that this API is not fully extensible
 without involvement of the quantization team:
 
--  This API has limitation around expressing quantization intentions for complicated operator patterns such as in the discussion of
-   `Issue-96288 <https://github.com/pytorch/pytorch/issues/96288>`__ to support ``conv add`` fusion.
-   Supporting ``conv add`` fusion also requires some changes to current already complicated pattern matching code such as in the
-   `PR-97122 <https://github.com/pytorch/pytorch/pull/97122>`__.
--  This API also has limitation around supporting user's advanced quantization intention to quantize their model. For example, if backend
-   developer only wants to quantize inputs and outputs when the ``linear`` has a third input, it requires co-work from quantization
-   team and backend developer.
--  This API uses ``QConfigMapping`` and ``BackendConfig`` as separate object. ``QConfigMapping`` describes user's
-   intention of how they want their model to be quantized. ``BackendConfig`` describes what kind of quantization a backend support.
-   ``BackendConfig`` is backend specific, but ``QConfigMapping`` is not. And user can provide a ``QConfigMapping``
-   that is incompatible with a specific ``BackendConfig``. This is not a great UX. Ideally, we can structure this better
-   by making both configuration (``QConfigMapping``) and quantization capability (``BackendConfig``) backend
-   specific. So there will be less confusion about incompatibilities.
--  In ``QConfig``, we are exposing observer/fake_quant classes as an object for user to configure quantization.
-   This increases the things that user needs to care about, e.g. not only the ``dtype`` but also how the
-   observation should happen. These could potentially be hidden from user to make user interface simpler.
-
-To address these scalability issues, 
+- This API has limitation to support advanced quantization intention and complicated quantization operator patterns
+  as in the discussion of `Issue-96288 <https://github.com/pytorch/pytorch/issues/96288>`__ to support ``conv add`` fusion.
+- This API uses ``QConfigMapping`` and ``BackendConfig`` as separate object in quantization configuration 
+  which may cause confusion about incompatibilities between these two objects. Also these quantization configurations require
+  too much quantization details users need to know which can be hidden from user interface to make it simpler.
+
+To address these issues,
 `Quantizer <https://github.com/pytorch/pytorch/blob/3e988316b5976df560c51c998303f56a234a6a1f/torch/ao/quantization/_pt2e/quantizer/quantizer.py#L160>`__
 is introduced for quantization in PyTorch 2.0 export. ``Quantizer`` is a class that users can use to
 programmatically set the quantization specifications for input and output of each node in the model graph. It adds flexibility
 to the quantization API and allows modeling users and backend developers to configure quantization programmatically.
 This will allow users to express how they want an operator pattern to be observed in a more explicit
-way by annotating the appropriate nodes. A backend specific quantizer inherited from base quantizer,
-some methods that need to be implemented:
-
--  `annotate method <https://github.com/pytorch/pytorch/blob/3e988316b5976df560c51c998303f56a234a6a1f/torch/ao/quantization/_pt2e/quantizer/qnnpack_quantizer.py#L269>`__
-   is used to annotate nodes in the graph with 
-   `QuantizationAnnotation <https://github.com/pytorch/pytorch/blob/07104ca99c9d297975270fb58fda786e60b49b38/torch/ao/quantization/_pt2e/quantizer/quantizer.py#L144>`__
-   objects to convey the desired way of quantization.
+way by annotating the appropriate nodes.
 
 Imagine a backend developer who wishes to integrate a third-party backend
 with PyTorch's quantization 2.0 flow. To accomplish this, they would only need
-to define the backend specific quantizer. The high level architecture of
-quantization 2.0 with quantizer could look like this:
+to define the backend specific quantizer. A backend specific quantizer inherited from base quantizer.
+The main method that need to be implemented for the backend specific quantizer is the
+`annotate method <https://github.com/pytorch/pytorch/blob/3e988316b5976df560c51c998303f56a234a6a1f/torch/ao/quantization/_pt2e/quantizer/qnnpack_quantizer.py#L269>`__
+which is used to annotate nodes in the graph with
+`QuantizationAnnotation <https://github.com/pytorch/pytorch/blob/07104ca99c9d297975270fb58fda786e60b49b38/torch/ao/quantization/_pt2e/quantizer/quantizer.py#L144>`__
+objects to convey the desired way of quantization.
+
+The high level architecture of quantization 2.0 with quantizer could look like this:
 
 ::
 
@@ -136,6 +128,9 @@ Taking QNNPackQuantizer as an example, the overall Quantization 2.0 flow could b
 
     # Step 4: Lower Reference Quantized Model into the backend
 
+Annotation API:
+^^^^^^^^^^^^^^^^^^^
+
 ``Quantizer`` uses annotation API to convey quantization intent for different operators/patterns.
 Annotation API mainly consists of
 `QuantizationSpec <https://github.com/pytorch/pytorch/blob/1ca2e993af6fa6934fca35da6970308ce227ddc7/torch/ao/quantization/_pt2e/quantizer/quantizer.py#L38>`__
@@ -366,8 +361,8 @@ functions that are used in the example:
    `get_bias_qspec <https://github.com/pytorch/pytorch/blob/47cfcf566ab76573452787335f10c9ca185752dc/torch/ao/quantization/_pt2e/quantizer/utils.py#L53>`__
    can be used to get the ``QuantizationSpec`` from ``QuantizationConfig`` for a specific pattern.
 
-6. Conclusion
----------------------
+Conclusion
+^^^^^^^^^^^^^^^^^^^
 
 With this tutorial, we introduce the new quantization path in PyTorch 2.0. Users can learn about
 how to define a ``BackendQuantizer`` with the ``QuantizationAnnotation API`` and integrate it into the quantization 2.0 flow.
diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
@@ -76,11 +76,14 @@ def make_model(in_size, out_size, num_layers):
 num_batches = 50
 epochs = 3
 
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+torch.set_default_device(device)
+
 # Creates data in default precision.
 # The same data is used for both default and mixed precision trials below.
 # You don't need to manually change inputs' ``dtype`` when enabling mixed precision.
-data = [torch.randn(batch_size, in_size, device="cuda") for _ in range(num_batches)]
-targets = [torch.randn(batch_size, out_size, device="cuda") for _ in range(num_batches)]
+data = [torch.randn(batch_size, in_size) for _ in range(num_batches)]
+targets = [torch.randn(batch_size, out_size) for _ in range(num_batches)]
 
 loss_fn = torch.nn.MSELoss().cuda()
 
@@ -116,7 +119,7 @@ def make_model(in_size, out_size, num_layers):
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
         # Runs the forward pass under ``autocast``.
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             # output is float16 because linear layers ``autocast`` to float16.
             assert output.dtype is torch.float16
@@ -151,7 +154,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
 
@@ -184,7 +187,7 @@ def make_model(in_size, out_size, num_layers):
 start_timer()
 for epoch in range(epochs):
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
+        with torch.autocast(device_type=device, dtype=torch.float16, enabled=use_amp):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()
@@ -202,7 +205,7 @@ def make_model(in_size, out_size, num_layers):
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
-        with torch.autocast(device_type='cuda', dtype=torch.float16):
+        with torch.autocast(device_type=device, dtype=torch.float16):
             output = net(input)
             loss = loss_fn(output, target)
         scaler.scale(loss).backward()
diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py
@@ -357,7 +357,7 @@ def fused_gelu(x):
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Instead of calling ``torch.rand(size).cuda()`` to generate a random tensor,
 # produce the output directly on the target device:
-# ``torch.rand(size, device=torch.device('cuda'))``.
+# ``torch.rand(size, device='cuda')``.
 #
 # This is applicable to all functions which create new tensors and accept
 # ``device`` argument:

Original file line number	Diff line number	Diff line change
`@@ -357,7 +357,7 @@ def fused_gelu(x):`
`357`	`357`	`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
`358`	`358`	# Instead of calling ``torch.rand(size).cuda()`` to generate a random tensor,
`359`	`359`	`# produce the output directly on the target device:`
`360`		-# ``torch.rand(size, device=torch.device('cuda'))``.
	`360`	+# ``torch.rand(size, device='cuda')``.
`361`	`361`	`#`
`362`	`362`	`# This is applicable to all functions which create new tensors and accept`
`363`	`363`	# ``device`` argument: