Skip to content

Commit e622d05

Browse files
Merge branch 'main' into ts-2340
2 parents 1d2fb99 + dc448c2 commit e622d05

19 files changed

+1459
-67
lines changed

.pyspelling.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ matrix:
1919
- open: '\.\.\s+(figure|literalinclude|math|image|grid)::'
2020
close: '\n'
2121
# Exclude roles:
22-
- open: ':(?:(class|py:mod|mod|func)):`'
22+
- open: ':(?:(class|py:mod|mod|func|meth|obj)):`'
2323
content: '[^`]*'
2424
close: '`'
2525
# Exclude reStructuredText hyperlinks
@@ -70,7 +70,7 @@ matrix:
7070
- open: ':figure:.*'
7171
close: '\n'
7272
# Ignore reStructuredText roles
73-
- open: ':(?:(class|file|func|math|ref|octicon)):`'
73+
- open: ':(?:(class|file|func|math|ref|octicon|meth|obj)):`'
7474
content: '[^`]*'
7575
close: '`'
7676
- open: ':width:'
91 KB
Loading

_static/img/pendulum.gif

122 KB
Loading

_static/img/rollout_recurrent.png

338 KB
Loading

advanced_source/pendulum.py

Lines changed: 912 additions & 0 deletions
Large diffs are not rendered by default.

advanced_source/static_quantization_tutorial.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,15 @@ Note: this code is taken from
206206
207207
# Fuse Conv+BN and Conv+BN+Relu modules prior to quantization
208208
# This operation does not change the numerics
209-
def fuse_model(self):
209+
def fuse_model(self, is_qat=False):
210+
fuse_modules = torch.ao.quantization.fuse_modules_qat if is_qat else torch.ao.quantization.fuse_modules
210211
for m in self.modules():
211212
if type(m) == ConvBNReLU:
212-
torch.ao.quantization.fuse_modules(m, ['0', '1', '2'], inplace=True)
213+
fuse_modules(m, ['0', '1', '2'], inplace=True)
213214
if type(m) == InvertedResidual:
214215
for idx in range(len(m.conv)):
215216
if type(m.conv[idx]) == nn.Conv2d:
216-
torch.ao.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)
217+
fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)
217218
218219
2. Helper functions
219220
-------------------
@@ -533,7 +534,7 @@ We fuse modules as before
533534
.. code:: python
534535
535536
qat_model = load_model(saved_model_dir + float_model_file)
536-
qat_model.fuse_model()
537+
qat_model.fuse_model(is_qat=True)
537538
538539
optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)
539540
# The old 'fbgemm' is still available but 'x86' is the recommended default.

advanced_source/super_resolution_with_onnxruntime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
.. code-block:: bash
2727
2828
%%bash
29-
pip install onnxruntime
29+
pip install onnx onnxruntime
3030
3131
ONNX Runtime recommends using the latest stable runtime for PyTorch.
3232

beginner_source/introyt/tensorboardyt_tutorial.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,13 +214,14 @@ def forward(self, x):
214214
# Check against the validation set
215215
running_vloss = 0.0
216216

217-
net.train(False) # Don't need to track gradents for validation
217+
# In evaluation mode some model specific operations can be omitted eg. dropout layer
218+
net.train(False) # Switching to evaluation mode, eg. turning off regularisation
218219
for j, vdata in enumerate(validation_loader, 0):
219220
vinputs, vlabels = vdata
220221
voutputs = net(vinputs)
221222
vloss = criterion(voutputs, vlabels)
222223
running_vloss += vloss.item()
223-
net.train(True) # Turn gradients back on for training
224+
net.train(True) # Switching back to training mode, eg. turning on regularisation
224225

225226
avg_loss = running_loss / 1000
226227
avg_vloss = running_vloss / len(validation_loader)

beginner_source/onnx/export_simple_model_to_onnx_tutorial.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,19 @@ def forward(self, x):
9090

9191
torch_model = MyModel()
9292
torch_input = torch.randn(1, 1, 32, 32)
93-
export_output = torch.onnx.dynamo_export(torch_model, torch_input)
93+
onnx_program = torch.onnx.dynamo_export(torch_model, torch_input)
9494

9595
######################################################################
9696
# As we can see, we didn't need any code change to the model.
97-
# The resulting ONNX model is stored within ``torch.onnx.ExportOutput`` as a binary protobuf file.
97+
# The resulting ONNX model is stored within ``torch.onnx.ONNXProgram`` as a binary protobuf file.
9898
#
9999
# 4. Save the ONNX model in a file
100100
# --------------------------------
101101
#
102102
# Although having the exported model loaded in memory is useful in many applications,
103103
# we can save it to disk with the following code:
104104

105-
export_output.save("my_image_classifier.onnx")
105+
onnx_program.save("my_image_classifier.onnx")
106106

107107
######################################################################
108108
# You can load the ONNX file back into memory and check if it is well formed with the following code:
@@ -155,7 +155,7 @@ def forward(self, x):
155155

156156
import onnxruntime
157157

158-
onnx_input = export_output.adapt_torch_inputs_to_onnx(torch_input)
158+
onnx_input = onnx_program.adapt_torch_inputs_to_onnx(torch_input)
159159
print(f"Input length: {len(onnx_input)}")
160160
print(f"Sample input: {onnx_input}")
161161

@@ -179,7 +179,7 @@ def to_numpy(tensor):
179179
# Before comparing the results, we need to convert the PyTorch's output to match ONNX's format.
180180

181181
torch_outputs = torch_model(torch_input)
182-
torch_outputs = export_output.adapt_torch_outputs_to_onnx(torch_outputs)
182+
torch_outputs = onnx_program.adapt_torch_outputs_to_onnx(torch_outputs)
183183

184184
assert len(torch_outputs) == len(onnxruntime_outputs)
185185
for torch_output, onnxruntime_output in zip(torch_outputs, onnxruntime_outputs):

beginner_source/onnx/onnx_registry_tutorial.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def custom_aten_add(input_x, input_y, alpha: float = 1.0):
114114
{onnx_registry.is_registered_op(namespace='aten', op_name='add', overload='Tensor')}"
115115
)
116116
export_options = torch.onnx.ExportOptions(onnx_registry=onnx_registry)
117-
export_output = torch.onnx.dynamo_export(
117+
onnx_program = torch.onnx.dynamo_export(
118118
aten_add_model, input_add_x, input_add_y, export_options=export_options
119119
)
120120

@@ -125,14 +125,14 @@ def custom_aten_add(input_x, input_y, alpha: float = 1.0):
125125
#
126126

127127
# graph node domain is the custom domain we registered
128-
assert export_output.model_proto.graph.node[0].domain == "custom.aten"
129-
assert len(export_output.model_proto.graph.node) == 1
128+
assert onnx_program.model_proto.graph.node[0].domain == "custom.aten"
129+
assert len(onnx_program.model_proto.graph.node) == 1
130130
# graph node name is the function name
131-
assert export_output.model_proto.graph.node[0].op_type == "custom_aten_add"
131+
assert onnx_program.model_proto.graph.node[0].op_type == "custom_aten_add"
132132
# function node domain is empty because we use standard ONNX operators
133-
assert export_output.model_proto.functions[0].node[3].domain == ""
133+
assert onnx_program.model_proto.functions[0].node[3].domain == ""
134134
# function node name is the standard ONNX operator name
135-
assert export_output.model_proto.functions[0].node[3].op_type == "Add"
135+
assert onnx_program.model_proto.functions[0].node[3].op_type == "Add"
136136

137137

138138
######################################################################
@@ -155,20 +155,20 @@ def custom_aten_add(input_x, input_y, alpha: float = 1.0):
155155

156156

157157
# Use ONNX Runtime to run the model, and compare the results with PyTorch
158-
export_output.save("./custom_add_model.onnx")
158+
onnx_program.save("./custom_add_model.onnx")
159159
ort_session = onnxruntime.InferenceSession(
160160
"./custom_add_model.onnx", providers=['CPUExecutionProvider']
161161
)
162162

163163
def to_numpy(tensor):
164164
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
165165

166-
onnx_input = export_output.adapt_torch_inputs_to_onnx(input_add_x, input_add_y)
166+
onnx_input = onnx_program.adapt_torch_inputs_to_onnx(input_add_x, input_add_y)
167167
onnxruntime_input = {k.name: to_numpy(v) for k, v in zip(ort_session.get_inputs(), onnx_input)}
168168
onnxruntime_outputs = ort_session.run(None, onnxruntime_input)
169169

170170
torch_outputs = aten_add_model(input_add_x, input_add_y)
171-
torch_outputs = export_output.adapt_torch_outputs_to_onnx(torch_outputs)
171+
torch_outputs = onnx_program.adapt_torch_outputs_to_onnx(torch_outputs)
172172

173173
assert len(torch_outputs) == len(onnxruntime_outputs)
174174
for torch_output, onnxruntime_output in zip(torch_outputs, onnxruntime_outputs):
@@ -225,7 +225,7 @@ def custom_aten_gelu(input_x, approximate: str = "none"):
225225
aten_gelu_model = CustomGelu()
226226
input_gelu_x = torch.randn(3, 3)
227227

228-
export_output = torch.onnx.dynamo_export(
228+
onnx_program = torch.onnx.dynamo_export(
229229
aten_gelu_model, input_gelu_x, export_options=export_options
230230
)
231231

@@ -238,13 +238,13 @@ def custom_aten_gelu(input_x, approximate: str = "none"):
238238
#
239239

240240
# graph node domain is the custom domain we registered
241-
assert export_output.model_proto.graph.node[0].domain == "com.microsoft"
241+
assert onnx_program.model_proto.graph.node[0].domain == "com.microsoft"
242242
# graph node name is the function name
243-
assert export_output.model_proto.graph.node[0].op_type == "custom_aten_gelu"
243+
assert onnx_program.model_proto.graph.node[0].op_type == "custom_aten_gelu"
244244
# function node domain is the custom domain we registered
245-
assert export_output.model_proto.functions[0].node[0].domain == "com.microsoft"
245+
assert onnx_program.model_proto.functions[0].node[0].domain == "com.microsoft"
246246
# function node name is the node name used in the function
247-
assert export_output.model_proto.functions[0].node[0].op_type == "Gelu"
247+
assert onnx_program.model_proto.functions[0].node[0].op_type == "Gelu"
248248

249249

250250
######################################################################
@@ -263,20 +263,20 @@ def custom_aten_gelu(input_x, approximate: str = "none"):
263263
# and compare the results with PyTorch.
264264
#
265265

266-
export_output.save("./custom_gelu_model.onnx")
266+
onnx_program.save("./custom_gelu_model.onnx")
267267
ort_session = onnxruntime.InferenceSession(
268268
"./custom_gelu_model.onnx", providers=['CPUExecutionProvider']
269269
)
270270

271271
def to_numpy(tensor):
272272
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
273273

274-
onnx_input = export_output.adapt_torch_inputs_to_onnx(input_gelu_x)
274+
onnx_input = onnx_program.adapt_torch_inputs_to_onnx(input_gelu_x)
275275
onnxruntime_input = {k.name: to_numpy(v) for k, v in zip(ort_session.get_inputs(), onnx_input)}
276276
onnxruntime_outputs = ort_session.run(None, onnxruntime_input)
277277

278278
torch_outputs = aten_gelu_model(input_gelu_x)
279-
torch_outputs = export_output.adapt_torch_outputs_to_onnx(torch_outputs)
279+
torch_outputs = onnx_program.adapt_torch_outputs_to_onnx(torch_outputs)
280280

281281
assert len(torch_outputs) == len(onnxruntime_outputs)
282282
for torch_output, onnxruntime_output in zip(torch_outputs, onnxruntime_outputs):
@@ -365,24 +365,24 @@ def custom_addandround(input_x):
365365
)
366366

367367
export_options = torch.onnx.ExportOptions(onnx_registry=onnx_registry)
368-
export_output = torch.onnx.dynamo_export(
368+
onnx_program = torch.onnx.dynamo_export(
369369
custom_addandround_model, input_addandround_x, export_options=export_options
370370
)
371-
export_output.save("./custom_addandround_model.onnx")
371+
onnx_program.save("./custom_addandround_model.onnx")
372372

373373

374374
######################################################################
375-
# The ``export_output`` exposes the exported model as protobuf through ``export_output.model_proto``.
375+
# The ``onnx_program`` exposes the exported model as protobuf through ``onnx_program.model_proto``.
376376
# The graph has one graph nodes for ``custom_addandround``, and inside ``custom_addandround``,
377377
# there are two function nodes, one for each operator.
378378
#
379379

380-
assert export_output.model_proto.graph.node[0].domain == "test.customop"
381-
assert export_output.model_proto.graph.node[0].op_type == "custom_addandround"
382-
assert export_output.model_proto.functions[0].node[0].domain == "test.customop"
383-
assert export_output.model_proto.functions[0].node[0].op_type == "CustomOpOne"
384-
assert export_output.model_proto.functions[0].node[1].domain == "test.customop"
385-
assert export_output.model_proto.functions[0].node[1].op_type == "CustomOpTwo"
380+
assert onnx_program.model_proto.graph.node[0].domain == "test.customop"
381+
assert onnx_program.model_proto.graph.node[0].op_type == "custom_addandround"
382+
assert onnx_program.model_proto.functions[0].node[0].domain == "test.customop"
383+
assert onnx_program.model_proto.functions[0].node[0].op_type == "CustomOpOne"
384+
assert onnx_program.model_proto.functions[0].node[1].domain == "test.customop"
385+
assert onnx_program.model_proto.functions[0].node[1].op_type == "CustomOpTwo"
386386

387387

388388
######################################################################
@@ -432,12 +432,12 @@ def custom_addandround(input_x):
432432
# def to_numpy(tensor):
433433
# return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
434434
#
435-
# onnx_input = export_output.adapt_torch_inputs_to_onnx(input_addandround_x)
435+
# onnx_input = onnx_program.adapt_torch_inputs_to_onnx(input_addandround_x)
436436
# onnxruntime_input = {k.name: to_numpy(v) for k, v in zip(ort_session.get_inputs(), onnx_input)}
437437
# onnxruntime_outputs = ort_session.run(None, onnxruntime_input)
438438
#
439439
# torch_outputs = custom_addandround_model(input_addandround_x)
440-
# torch_outputs = export_output.adapt_torch_outputs_to_onnx(torch_outputs)
440+
# torch_outputs = onnx_program.adapt_torch_outputs_to_onnx(torch_outputs)
441441
#
442442
# assert len(torch_outputs) == len(onnxruntime_outputs)
443443
# for torch_output, onnxruntime_output in zip(torch_outputs, onnxruntime_outputs):

beginner_source/transformer_tutorial.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
######################################################################
3131
# In this tutorial, we train a ``nn.TransformerEncoder`` model on a
32-
# language modeling task. Please note that this tutorial does not cover
32+
# causal language modeling task. Please note that this tutorial does not cover
3333
# the training of `nn.TransformerDecoder <https://pytorch.org/docs/stable/generated/torch.nn.TransformerDecoder.html#torch.nn.TransformerDecoder>`__, as depicted in
3434
# the right half of the diagram above. The language modeling task is to assign a
3535
# probability for the likelihood of a given word (or a sequence of words)
@@ -41,8 +41,10 @@
4141
# Along with the input sequence, a square attention mask is required because the
4242
# self-attention layers in ``nn.TransformerDecoder`` are only allowed to attend
4343
# the earlier positions in the sequence. For the language modeling task, any
44-
# tokens on the future positions should be masked. To produce a probability
45-
# distribution over output words, the output of the ``nn.TransformerEncoder``
44+
# tokens on the future positions should be masked. This masking, combined with fact that
45+
# the output embeddings are offset with later positions ensures that the
46+
# predictions for position i can depend only on the known outputs at positions less than i.
47+
# To produce a probability distribution over output words, the output of the ``nn.TransformerEncoder``
4648
# model is passed through a linear layer to output unnormalized logits.
4749
# The log-softmax function isn't applied here due to the later use of
4850
# `CrossEntropyLoss <https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html>`__,
@@ -91,6 +93,11 @@ def forward(self, src: Tensor, src_mask: Tensor = None) -> Tensor:
9193
"""
9294
src = self.embedding(src) * math.sqrt(self.d_model)
9395
src = self.pos_encoder(src)
96+
if src_mask is None:
97+
"""Generate a square causal mask for the sequence. The masked positions are filled with float('-inf').
98+
Unmasked positions are filled with float(0.0).
99+
"""
100+
src_mask = nn.Transformer.generate_square_subsequent_mask(len(src)).to(device)
94101
output = self.transformer_encoder(src, src_mask)
95102
output = self.linear(output)
96103
return output

en-wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ Colab
6262
Conv
6363
ConvNet
6464
ConvNets
65+
customizable
6566
DCGAN
6667
DCGANs
6768
DDP

index.rst

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,26 @@ What's new in PyTorch tutorials?
312312
:link: intermediate/mario_rl_tutorial.html
313313
:tags: Reinforcement-Learning
314314

315+
.. customcarditem::
316+
:header: Recurrent DQN
317+
:card_description: Use TorchRL to train recurrent policies
318+
:image: _static/img/rollout_recurrent.png
319+
:link: intermediate/dqn_with_rnn_tutorial.html
320+
:tags: Reinforcement-Learning
321+
315322
.. customcarditem::
316323
:header: Code a DDPG Loss
317324
:card_description: Use TorchRL to code a DDPG Loss
318325
:image: _static/img/half_cheetah.gif
319326
:link: advanced/coding_ddpg.html
320327
:tags: Reinforcement-Learning
321328

322-
329+
.. customcarditem::
330+
:header: Writing your environment and transforms
331+
:card_description: Use TorchRL to code a Pendulum
332+
:image: _static/img/pendulum.gif
333+
:link: advanced/pendulum.html
334+
:tags: Reinforcement-Learning
323335

324336
.. Deploying PyTorch Models in Production
325337
@@ -959,6 +971,7 @@ Additional Resources
959971
intermediate/reinforcement_q_learning
960972
intermediate/reinforcement_ppo
961973
intermediate/mario_rl_tutorial
974+
advanced/pendulum
962975

963976
.. toctree::
964977
:maxdepth: 2

intermediate_source/FSDP_tutorial.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ At a high level FSDP works as follow:
4646
* Run reduce_scatter to sync gradients
4747
* Discard parameters.
4848

49+
One way to view FSDP's sharding is to decompose the DDP gradient all-reduce into reduce-scatter and all-gather. Specifically, during the backward pass, FSDP reduces and scatters gradients, ensuring that each rank possesses a shard of the gradients. Then it updates the corresponding shard of the parameters in the optimizer step. Finally, in the subsequent forward pass, it performs an all-gather operation to collect and combine the updated parameter shards.
50+
51+
.. figure:: /_static/img/distributed/fsdp_sharding.png
52+
:width: 100%
53+
:align: center
54+
:alt: FSDP allreduce
55+
56+
FSDP Allreduce
57+
4958
How to use FSDP
5059
--------------
5160
Here we use a toy model to run training on the MNIST dataset for demonstration purposes. The APIs and logic can be applied to training larger models as well.

0 commit comments

Comments
 (0)