Skip to content

Commit 88d32df

Browse files
authored
Merge branch 'main' into add_amx_doc
2 parents 3d01c9a + 0ef9a65 commit 88d32df

File tree

8 files changed

+463
-25
lines changed

8 files changed

+463
-25
lines changed
Loading

beginner_source/transfer_learning_tutorial.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import matplotlib.pyplot as plt
4545
import time
4646
import os
47+
from PIL import Image
4748
from tempfile import TemporaryDirectory
4849

4950
cudnn.benchmark = True
@@ -337,6 +338,47 @@ def visualize_model(model, num_images=6):
337338
plt.ioff()
338339
plt.show()
339340

341+
342+
######################################################################
343+
# Inference on custom images
344+
# --------------------------
345+
#
346+
# Use the trained model to make predictions on custom images and visualize
347+
# the predicted class labels along with the images.
348+
#
349+
350+
def visualize_model_predictions(model,img_path):
351+
was_training = model.training
352+
model.eval()
353+
354+
img = Image.open(img_path)
355+
img = data_transforms['val'](img)
356+
img = img.unsqueeze(0)
357+
img = img.to(device)
358+
359+
with torch.no_grad():
360+
outputs = model(img)
361+
_, preds = torch.max(outputs, 1)
362+
363+
ax = plt.subplot(2,2,1)
364+
ax.axis('off')
365+
ax.set_title(f'Predicted: {class_names[preds[0]]}')
366+
imshow(img.cpu().data[0])
367+
368+
model.train(mode=was_training)
369+
370+
######################################################################
371+
#
372+
373+
visualize_model_predictions(
374+
model_conv,
375+
img_path='data/hymenoptera_data/val/bees/72100438_73de9f17af.jpg'
376+
)
377+
378+
plt.ioff()
379+
plt.show()
380+
381+
340382
######################################################################
341383
# Further Learning
342384
# -----------------

beginner_source/transformer_tutorial.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Language Modeling with ``nn.Transformer`` and torchtext
33
===============================================================
44
5-
This is a tutorial on training a sequence-to-sequence model that uses the
5+
This is a tutorial on training a model to predict the next word in a sequence using the
66
`nn.Transformer <https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html>`__ module.
77
88
The PyTorch 1.2 release includes a standard transformer module based on the
@@ -29,15 +29,24 @@
2929

3030
######################################################################
3131
# In this tutorial, we train a ``nn.TransformerEncoder`` model on a
32-
# language modeling task. The language modeling task is to assign a
32+
# language modeling task. Please note that this tutorial does not cover
33+
# the training of `nn.TransformerDecoder <https://pytorch.org/docs/stable/generated/torch.nn.TransformerDecoder.html#torch.nn.TransformerDecoder>`__, as depicted in
34+
# the right half of the diagram above. The language modeling task is to assign a
3335
# probability for the likelihood of a given word (or a sequence of words)
3436
# to follow a sequence of words. A sequence of tokens are passed to the embedding
3537
# layer first, followed by a positional encoding layer to account for the order
3638
# of the word (see the next paragraph for more details). The
3739
# ``nn.TransformerEncoder`` consists of multiple layers of
3840
# `nn.TransformerEncoderLayer <https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoderLayer.html>`__.
39-
# To produce a probability distribution over output words, the output of
40-
# the ``nn.TransformerEncoder`` model is passed through a linear layer.
41+
# Along with the input sequence, a square attention mask is required because the
42+
# self-attention layers in ``nn.TransformerDecoder`` are only allowed to attend
43+
# the earlier positions in the sequence. For the language modeling task, any
44+
# tokens on the future positions should be masked. To produce a probability
45+
# distribution over output words, the output of the ``nn.TransformerEncoder``
46+
# model is passed through a linear layer to output unnormalized logits.
47+
# The log-softmax function isn't applied here due to the later use of
48+
# `CrossEntropyLoss <https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html>`__,
49+
# which requires the inputs to be unnormalized logits.
4150
#
4251

4352
import math
@@ -130,6 +139,7 @@ def forward(self, x: Tensor) -> Tensor:
130139
# .. code-block:: bash
131140
#
132141
# %%bash
142+
# pip install portalocker
133143
# pip install torchdata
134144
#
135145
# The vocab object is built based on the train dataset and is used to numericalize

en-wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ LeNet
8989
LeakyReLU
9090
LeakyReLUs
9191
Lipschitz
92+
logits
9293
Lua
9394
Luong
9495
MLP

intermediate_source/FSDP_adavnced_tutorial.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ highlight different available features in FSDP that are helpful for training
7575
large scale model above 3B parameters. Also, we cover specific features for
7676
Transformer based models. The code for this tutorial is available in `Pytorch
7777
Examples
78-
<https://github.com/HamidShojanazeri/examples/tree/FSDP_example/FSDP/>`__.
78+
<https://github.com/HamidShojanazeri/examples/tree/FSDP_example/distributed/FSDP/>`__.
7979

8080

8181
*Setup*
@@ -97,13 +97,13 @@ Please create a `data` folder, download the WikiHow dataset from `wikihowAll.csv
9797
`wikihowSep.cs <https://ucsb.app.box.com/s/7yq601ijl1lzvlfu4rjdbbxforzd2oag>`__,
9898
and place them in the `data` folder. We will use the wikihow dataset from
9999
`summarization_dataset
100-
<https://github.com/HamidShojanazeri/examples/blob/FSDP_example/FSDP/summarization_dataset.py>`__.
100+
<https://github.com/HamidShojanazeri/examples/blob/FSDP_example/distributed/FSDP/summarization_dataset.py>`__.
101101

102102
Next, we add the following code snippets to a Python script “T5_training.py”.
103103

104104
.. note::
105105
The full source code for this tutorial is available in `PyTorch examples
106-
<https://github.com/HamidShojanazeri/examples/tree/FSDP_example/FSDP>`__.
106+
<https://github.com/HamidShojanazeri/examples/tree/FSDP_example/distributed/FSDP>`__.
107107

108108
1.3 Import necessary packages:
109109

intermediate_source/seq2seq_translation_tutorial.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -440,25 +440,27 @@ def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGT
440440
self.max_length = max_length
441441

442442
self.embedding = nn.Embedding(self.output_size, self.hidden_size)
443-
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
444-
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
443+
self.fc_hidden = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
444+
self.fc_encoder = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
445+
self.alignment_vector = nn.Parameter(torch.Tensor(1, hidden_size))
446+
torch.nn.init.xavier_uniform_(self.alignment_vector)
445447
self.dropout = nn.Dropout(self.dropout_p)
446-
self.gru = nn.GRU(self.hidden_size, self.hidden_size)
448+
self.gru = nn.GRU(self.hidden_size * 2, self.hidden_size)
447449
self.out = nn.Linear(self.hidden_size, self.output_size)
448450

449451
def forward(self, input, hidden, encoder_outputs):
450-
embedded = self.embedding(input).view(1, 1, -1)
452+
embedded = self.embedding(input).view(1, -1)
451453
embedded = self.dropout(embedded)
452454

453-
attn_weights = F.softmax(
454-
self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
455-
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
456-
encoder_outputs.unsqueeze(0))
457-
458-
output = torch.cat((embedded[0], attn_applied[0]), 1)
459-
output = self.attn_combine(output).unsqueeze(0)
455+
transformed_hidden = self.fc_hidden(hidden[0])
456+
expanded_hidden_state = transformed_hidden.expand(self.max_length, -1)
457+
alignment_scores = torch.tanh(expanded_hidden_state +
458+
self.fc_encoder(encoder_outputs))
459+
alignment_scores = self.alignment_vector.mm(alignment_scores.T)
460+
attn_weights = F.softmax(alignment_scores, dim=1)
461+
context_vector = attn_weights.mm(encoder_outputs)
460462

461-
output = F.relu(output)
463+
output = torch.cat((embedded, context_vector), 1).unsqueeze(0)
462464
output, hidden = self.gru(output, hidden)
463465

464466
output = F.log_softmax(self.out(output[0]), dim=1)
@@ -761,15 +763,15 @@ def evaluateRandomly(encoder, decoder, n=10):
761763
#
762764

763765
hidden_size = 256
764-
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
765-
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
766+
encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
767+
attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
766768

767-
trainIters(encoder1, attn_decoder1, 75000, print_every=5000)
769+
trainIters(encoder, attn_decoder, 75000, print_every=5000)
768770

769771
######################################################################
770772
#
771773

772-
evaluateRandomly(encoder1, attn_decoder1)
774+
evaluateRandomly(encoder, attn_decoder)
773775

774776

775777
######################################################################
@@ -787,7 +789,7 @@ def evaluateRandomly(encoder, decoder, n=10):
787789
#
788790

789791
output_words, attentions = evaluate(
790-
encoder1, attn_decoder1, "je suis trop froid .")
792+
encoder, attn_decoder, "je suis trop froid .")
791793
plt.matshow(attentions.numpy())
792794

793795

@@ -817,7 +819,7 @@ def showAttention(input_sentence, output_words, attentions):
817819

818820
def evaluateAndShowAttention(input_sentence):
819821
output_words, attentions = evaluate(
820-
encoder1, attn_decoder1, input_sentence)
822+
encoder, attn_decoder, input_sentence)
821823
print('input =', input_sentence)
822824
print('output =', ' '.join(output_words))
823825
showAttention(input_sentence, output_words, attentions)

prototype_source/prototype_index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ Prototype features are not available as part of binary distributions like PyPI o
6868
:link: ../prototype/numeric_suite_tutorial.html
6969
:tags: Debugging,Quantization
7070

71+
.. customcarditem::
72+
:header: Quantization in PyTorch 2.0 Export Tutorial
73+
:card_description: Learn how to use the Quantization in PyTorch 2.0 Export.
74+
:image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
75+
:link: ../prototype/quantization_in_pytorch_2_0_export_tutorial.html
76+
:tags: Quantization
77+
7178
.. Mobile
7279
7380
.. customcarditem::
@@ -193,6 +200,7 @@ Prototype features are not available as part of binary distributions like PyPI o
193200
prototype/fx_graph_mode_ptq_dynamic.html
194201
prototype/fx_graph_mode_ptq_static.html
195202
prototype/graph_mode_dynamic_bert_tutorial.html
203+
prototype/quantization_in_pytorch_2_0_export_tutorial.html
196204
prototype/ios_gpu_workflow.html
197205
prototype/nnapi_mobilenetv2.html
198206
prototype/tracing_based_selective_build.html

0 commit comments

Comments
 (0)