Skip to content

Commit 2c5de77

Browse files
authored
Merge branch 'master' into patch-1
2 parents 310e330 + 917cad5 commit 2c5de77

13 files changed

+1537
-654
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
9797
sudo pip -q install awscli==1.16.35
9898
9999
if [ -n "${CUDA_VERSION}" ]; then
100-
DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
100+
DRIVER_FN="NVIDIA-Linux-x86_64-430.40.run"
101101
wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
102102
sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
103103
nvidia-smi

Makefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,23 @@ download:
8181
wget -N https://s3.amazonaws.com/pytorch-tutorial-assets/lenet_mnist_model.pth -P $(DATADIR)
8282
cp $(DATADIR)/lenet_mnist_model.pth ./beginner_source/data/lenet_mnist_model.pth
8383

84+
# Download model for advanced_source/dynamic_quantization_tutorial.py
85+
wget -N https://s3.amazonaws.com/pytorch-tutorial-assets/word_language_model_quantize.pth -P $(DATADIR)
86+
cp $(DATADIR)/word_language_model_quantize.pth advanced_source/data/word_language_model_quantize.pth
87+
88+
# Download data for advanced_source/dynamic_quantization_tutorial.py
89+
wget -N https://s3.amazonaws.com/pytorch-tutorial-assets/wikitext-2.zip -P $(DATADIR)
90+
unzip -q -o $(DATADIR)/wikitext-2.zip -d advanced_source/data/
91+
92+
# Download model for advanced_source/static_quantization_tutorial.py
93+
wget -N https://s3.amazonaws.com/pytorch-tutorial-assets/mobilenet_quantization.pth -P $(DATADIR)
94+
cp $(DATADIR)/mobilenet_quantization.pth advanced_source/data/mobilenet_quantization.pth
95+
96+
# Download dataset for advanced_source/static_quantization_tutorial.py
97+
wget -N https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip -P $(DATADIR)
98+
unzip -q -o $(DATADIR)/imagenet_1k.zip -d advanced_source/data/
99+
100+
84101
docs:
85102
make download
86103
make html

_static/img/named_tensor.png

68.1 KB
Loading

_static/img/qat.png

170 KB
Loading

_static/img/quant_asym.png

8.18 KB
Loading
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
"""
2+
(experimental) Dynamic Quantization on an LSTM Word Language Model
3+
==================================================================
4+
5+
**Author**: `James Reed <https://github.com/jamesr66a>`_
6+
7+
**Edited by**: `Seth Weidman <https://github.com/SethHWeidman/>`_
8+
9+
Introduction
10+
------------
11+
12+
Quantization involves converting the weights and activations of your model from float
13+
to int, which can result in smaller model size and faster inference with only a small
14+
hit to accuracy.
15+
16+
In this tutorial, we'll apply the easiest form of quantization -
17+
`dynamic quantization <https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic>`_ -
18+
to an LSTM-based next word-prediction model, closely following the
19+
`word language model <https://github.com/pytorch/examples/tree/master/word_language_model>`_
20+
from the PyTorch examples.
21+
"""
22+
23+
# imports
24+
import os
25+
from io import open
26+
import time
27+
28+
import torch
29+
import torch.nn as nn
30+
import torch.nn.functional as F
31+
32+
######################################################################
33+
# 1. Define the model
34+
# -------------------
35+
#
36+
# Here we define the LSTM model architecture, following the
37+
# `model <https://github.com/pytorch/examples/blob/master/word_language_model/model.py>`_
38+
# from the word language model example.
39+
40+
class LSTMModel(nn.Module):
41+
"""Container module with an encoder, a recurrent module, and a decoder."""
42+
43+
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
44+
super(LSTMModel, self).__init__()
45+
self.drop = nn.Dropout(dropout)
46+
self.encoder = nn.Embedding(ntoken, ninp)
47+
self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
48+
self.decoder = nn.Linear(nhid, ntoken)
49+
50+
self.init_weights()
51+
52+
self.nhid = nhid
53+
self.nlayers = nlayers
54+
55+
def init_weights(self):
56+
initrange = 0.1
57+
self.encoder.weight.data.uniform_(-initrange, initrange)
58+
self.decoder.bias.data.zero_()
59+
self.decoder.weight.data.uniform_(-initrange, initrange)
60+
61+
def forward(self, input, hidden):
62+
emb = self.drop(self.encoder(input))
63+
output, hidden = self.rnn(emb, hidden)
64+
output = self.drop(output)
65+
decoded = self.decoder(output)
66+
return decoded, hidden
67+
68+
def init_hidden(self, bsz):
69+
weight = next(self.parameters())
70+
return (weight.new_zeros(self.nlayers, bsz, self.nhid),
71+
weight.new_zeros(self.nlayers, bsz, self.nhid))
72+
73+
######################################################################
74+
# 2. Load in the text data
75+
# ------------------------
76+
#
77+
# Next, we load the
78+
# `Wikitext-2 dataset <https://www.google.com/search?q=wikitext+2+data>`_ into a `Corpus`,
79+
# again following the
80+
# `preprocessing <https://github.com/pytorch/examples/blob/master/word_language_model/data.py>`_
81+
# from the word language model example.
82+
83+
class Dictionary(object):
84+
def __init__(self):
85+
self.word2idx = {}
86+
self.idx2word = []
87+
88+
def add_word(self, word):
89+
if word not in self.word2idx:
90+
self.idx2word.append(word)
91+
self.word2idx[word] = len(self.idx2word) - 1
92+
return self.word2idx[word]
93+
94+
def __len__(self):
95+
return len(self.idx2word)
96+
97+
98+
class Corpus(object):
99+
def __init__(self, path):
100+
self.dictionary = Dictionary()
101+
self.train = self.tokenize(os.path.join(path, 'train.txt'))
102+
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
103+
self.test = self.tokenize(os.path.join(path, 'test.txt'))
104+
105+
def tokenize(self, path):
106+
"""Tokenizes a text file."""
107+
assert os.path.exists(path)
108+
# Add words to the dictionary
109+
with open(path, 'r', encoding="utf8") as f:
110+
for line in f:
111+
words = line.split() + ['<eos>']
112+
for word in words:
113+
self.dictionary.add_word(word)
114+
115+
# Tokenize file content
116+
with open(path, 'r', encoding="utf8") as f:
117+
idss = []
118+
for line in f:
119+
words = line.split() + ['<eos>']
120+
ids = []
121+
for word in words:
122+
ids.append(self.dictionary.word2idx[word])
123+
idss.append(torch.tensor(ids).type(torch.int64))
124+
ids = torch.cat(idss)
125+
126+
return ids
127+
128+
model_data_filepath = 'data/'
129+
130+
corpus = Corpus(model_data_filepath + 'wikitext-2')
131+
132+
######################################################################
133+
# 3. Load the pre-trained model
134+
# -----------------------------
135+
#
136+
# This is a tutorial on dynamic quantization, a quantization technique
137+
# that is applied after a model has been trained. Therefore, we'll simply load some
138+
# pre-trained weights into this model architecture; these weights were obtained
139+
# by training for five epochs using the default settings in the word language model
140+
# example.
141+
142+
ntokens = len(corpus.dictionary)
143+
144+
model = LSTMModel(
145+
ntoken = ntokens,
146+
ninp = 512,
147+
nhid = 256,
148+
nlayers = 5,
149+
)
150+
151+
model.load_state_dict(
152+
torch.load(
153+
model_data_filepath + 'word_language_model_quantize.pth',
154+
map_location=torch.device('cpu')
155+
)
156+
)
157+
158+
model.eval()
159+
print(model)
160+
161+
######################################################################
162+
# Now let's generate some text to ensure that the pre-trained model is working
163+
# properly - similarly to before, we follow
164+
# `here <https://github.com/pytorch/examples/blob/master/word_language_model/generate.py>`_
165+
166+
input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
167+
hidden = model.init_hidden(1)
168+
temperature = 1.0
169+
num_words = 1000
170+
171+
with open(model_data_filepath + 'out.txt', 'w') as outf:
172+
with torch.no_grad(): # no tracking history
173+
for i in range(num_words):
174+
output, hidden = model(input_, hidden)
175+
word_weights = output.squeeze().div(temperature).exp().cpu()
176+
word_idx = torch.multinomial(word_weights, 1)[0]
177+
input_.fill_(word_idx)
178+
179+
word = corpus.dictionary.idx2word[word_idx]
180+
181+
outf.write(str(word.encode('utf-8')) + ('\n' if i % 20 == 19 else ' '))
182+
183+
if i % 100 == 0:
184+
print('| Generated {}/{} words'.format(i, 1000))
185+
186+
with open(model_data_filepath + 'out.txt', 'r') as outf:
187+
all_output = outf.read()
188+
print(all_output)
189+
190+
######################################################################
191+
# It's no GPT-2, but it looks like the model has started to learn the structure of
192+
# language!
193+
#
194+
# We're almost ready to demonstrate dynamic quantization. We just need to define a few more
195+
# helper functions:
196+
197+
bptt = 25
198+
criterion = nn.CrossEntropyLoss()
199+
eval_batch_size = 1
200+
201+
# create test data set
202+
def batchify(data, bsz):
203+
# Work out how cleanly we can divide the dataset into bsz parts.
204+
nbatch = data.size(0) // bsz
205+
# Trim off any extra elements that wouldn't cleanly fit (remainders).
206+
data = data.narrow(0, 0, nbatch * bsz)
207+
# Evenly divide the data across the bsz batches.
208+
return data.view(bsz, -1).t().contiguous()
209+
210+
test_data = batchify(corpus.test, eval_batch_size)
211+
212+
# Evaluation functions
213+
def get_batch(source, i):
214+
seq_len = min(bptt, len(source) - 1 - i)
215+
data = source[i:i+seq_len]
216+
target = source[i+1:i+1+seq_len].view(-1)
217+
return data, target
218+
219+
def repackage_hidden(h):
220+
"""Wraps hidden states in new Tensors, to detach them from their history."""
221+
222+
if isinstance(h, torch.Tensor):
223+
return h.detach()
224+
else:
225+
return tuple(repackage_hidden(v) for v in h)
226+
227+
def evaluate(model_, data_source):
228+
# Turn on evaluation mode which disables dropout.
229+
model_.eval()
230+
total_loss = 0.
231+
hidden = model_.init_hidden(eval_batch_size)
232+
with torch.no_grad():
233+
for i in range(0, data_source.size(0) - 1, bptt):
234+
data, targets = get_batch(data_source, i)
235+
output, hidden = model_(data, hidden)
236+
hidden = repackage_hidden(hidden)
237+
output_flat = output.view(-1, ntokens)
238+
total_loss += len(data) * criterion(output_flat, targets).item()
239+
return total_loss / (len(data_source) - 1)
240+
241+
######################################################################
242+
# 4. Test dynamic quantization
243+
# ----------------------------
244+
#
245+
# Finally, we can call ``torch.quantization.quantize_dynamic`` on the model!
246+
# Specifically,
247+
#
248+
# - We specify that we want the ``nn.LSTM`` and ``nn.Linear`` modules in our
249+
# model to be quantized
250+
# - We specify that we want weights to be converted to ``int8`` values
251+
252+
import torch.quantization
253+
254+
quantized_model = torch.quantization.quantize_dynamic(
255+
model, {nn.LSTM, nn.Linear}, dtype=torch.qint8
256+
)
257+
print(quantized_model)
258+
259+
######################################################################
260+
# The model looks the same; how has this benefited us? First, we see a
261+
# significant reduction in model size:
262+
263+
def print_size_of_model(model):
264+
torch.save(model.state_dict(), "temp.p")
265+
print('Size (MB):', os.path.getsize("temp.p")/1e6)
266+
os.remove('temp.p')
267+
268+
print_size_of_model(model)
269+
print_size_of_model(quantized_model)
270+
271+
######################################################################
272+
# Second, we see faster inference time, with no difference in evaluation loss:
273+
#
274+
# Note: we number of threads to one for single threaded comparison, since quantized
275+
# models run single threaded.
276+
277+
torch.set_num_threads(1)
278+
279+
def time_model_evaluation(model, test_data):
280+
s = time.time()
281+
loss = evaluate(model, test_data)
282+
elapsed = time.time() - s
283+
print('''loss: {0:.3f}\nelapsed time (seconds): {1:.1f}'''.format(loss, elapsed))
284+
285+
time_model_evaluation(model, test_data)
286+
time_model_evaluation(quantized_model, test_data)
287+
288+
######################################################################
289+
# Running this locally on a MacBook Pro, without quantization, inference takes about 200 seconds,
290+
# and with quantization it takes just about 100 seconds.
291+
#
292+
# Conclusion
293+
# ----------
294+
#
295+
# Dynamic quantization can be an easy way to reduce model size while only
296+
# having a limited effect on accuracy.
297+
#
298+
# Thanks for reading! As always, we welcome any feedback, so please create an issue
299+
# `here <https://github.com/pytorch/pytorch/issues>`_ if you have any.

0 commit comments

Comments
 (0)