Skip to content

Commit 8b05510

Browse files
Merge branch 'master' into master
2 parents ec487a2 + eb732ef commit 8b05510

File tree

3 files changed

+11
-17
lines changed

3 files changed

+11
-17
lines changed

.circleci/scripts/build_for_windows.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ if [[ "${CIRCLE_JOB}" == *worker_* ]]; then
4949
python $DIR/remove_runnable_code.py advanced_source/static_quantization_tutorial.py advanced_source/static_quantization_tutorial.py || true
5050
python $DIR/remove_runnable_code.py beginner_source/hyperparameter_tuning_tutorial.py beginner_source/hyperparameter_tuning_tutorial.py || true
5151
python $DIR/remove_runnable_code.py beginner_source/audio_preprocessing_tutorial.py beginner_source/audio_preprocessing_tutorial.py || true
52-
# Temp remove for mnist download issue.
53-
python $DIR/remove_runnable_code.py beginner_source/fgsm_tutorial.py beginner_source/fgsm_tutorial.py || true
52+
# Temp remove for mnist download issue. (Re-enabled for 1.8.1)
53+
# python $DIR/remove_runnable_code.py beginner_source/fgsm_tutorial.py beginner_source/fgsm_tutorial.py || true
5454

5555
export WORKER_ID=$(echo "${CIRCLE_JOB}" | tr -dc '0-9')
5656
count=0

advanced_source/ddp_pipeline.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def forward(self, x):
8989
class Encoder(nn.Module):
9090
def __init__(self, ntoken, ninp, dropout=0.5):
9191
super(Encoder, self).__init__()
92-
self.src_mask = None
9392
self.pos_encoder = PositionalEncoding(ninp, dropout)
9493
self.encoder = nn.Embedding(ntoken, ninp)
9594
self.ninp = ninp
@@ -99,17 +98,9 @@ def init_weights(self):
9998
initrange = 0.1
10099
self.encoder.weight.data.uniform_(-initrange, initrange)
101100

102-
def _generate_square_subsequent_mask(self, sz):
103-
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
104-
mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
105-
return mask
106-
107101
def forward(self, src):
108-
if self.src_mask is None or self.src_mask.size(0) != src.size(0):
109-
device = src.device
110-
mask = self._generate_square_subsequent_mask(src.size(0)).to(device)
111-
self.src_mask = mask
112-
102+
# Need (S, N) format for encoder.
103+
src = src.t()
113104
src = self.encoder(src) * math.sqrt(self.ninp)
114105
return self.pos_encoder(src)
115106

@@ -125,7 +116,8 @@ def init_weights(self):
125116
self.decoder.weight.data.uniform_(-initrange, initrange)
126117

127118
def forward(self, inp):
128-
return self.decoder(inp)
119+
# Need batch dimension first for output of pipeline.
120+
return self.decoder(inp).permute(1, 0, 2)
129121

130122
######################################################################
131123
# Start multiple processes for training
@@ -245,7 +237,8 @@ def get_batch(source, i):
245237
seq_len = min(bptt, len(source) - 1 - i)
246238
data = source[i:i+seq_len]
247239
target = source[i+1:i+1+seq_len].view(-1)
248-
return data, target
240+
# Need batch dimension first for pipeline parallelism.
241+
return data.t(), target
249242

250243
######################################################################
251244
# Model scale and Pipe initialization
@@ -318,8 +311,9 @@ def get_batch(source, i):
318311
# Need to use 'checkpoint=never' since as of PyTorch 1.8, Pipe checkpointing
319312
# doesn't work with DDP.
320313
from torch.distributed.pipeline.sync import Pipe
314+
chunks = 8
321315
model = Pipe(torch.nn.Sequential(
322-
*module_list), chunks = 8, checkpoint="never")
316+
*module_list), chunks = chunks, checkpoint="never")
323317

324318
# Initialize process group and wrap model in DDP.
325319
from torch.nn.parallel import DistributedDataParallel

beginner_source/basics/optimization_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
the `previous section <autograd_tutorial.html>`_), and **optimizes** these parameters using gradient descent. For a more
1919
detailed walkthrough of this process, check out this video on `backpropagation from 3Blue1Brown <https://www.youtube.com/watch?v=tIeHLnjs5U8>`__.
2020
21-
Pre-requisite Code
21+
Prerequisite Code
2222
-----------------
2323
We load the code from the previous sections on `Datasets & DataLoaders <data_tutorial.html>`_
2424
and `Build Model <buildmodel_tutorial.html>`_.

0 commit comments

Comments
 (0)