Merge branch 'master' of github.com:pytorch/tutorials into rl_cuda

chsasank · chsasank · commit 6aa8b5eabf13 · 2017-05-18T22:18:43.000+05:30
diff --git a/beginner_source/blitz/autograd_tutorial.py b/beginner_source/blitz/autograd_tutorial.py
@@ -36,9 +36,9 @@
 
 ``Variable`` and ``Function`` are interconnected and build up an acyclic
 graph, that encodes a complete history of computation. Each variable has
-a ``.creator`` attribute that references a ``Function`` that has created
+a ``.grad_fn`` attribute that references a ``Function`` that has created
 the ``Variable`` (except for Variables created by the user - their
-``creator is None``).
+``grad_fn is None``).
 
 If you want to compute the derivatives, you can call ``.backward()`` on
 a ``Variable``. If ``Variable`` is a scalar (i.e. it holds a one element
@@ -61,8 +61,8 @@
 print(y)
 
 ###############################################################
-# ``y`` was created as a result of an operation, so it has a creator.
-print(y.creator)
+# ``y`` was created as a result of an operation, so it has a ``grad_fn``.
+print(y.grad_fn)
 
 ###############################################################
 # Do more operations on y
diff --git a/beginner_source/blitz/neural_networks_tutorial.py b/beginner_source/blitz/neural_networks_tutorial.py
@@ -157,15 +157,15 @@ def num_flat_features(self, x):
 # For example:
 
 output = net(input)
-target = Variable(torch.range(1, 10))  # a dummy target, for example
+target = Variable(torch.arange(1, 11))  # a dummy target, for example
 criterion = nn.MSELoss()
 
 loss = criterion(output, target)
 print(loss)
 
 ########################################################################
 # Now, if you follow ``loss`` in the backward direction, using it’s
-# ``.creator`` attribute, you will see a graph of computations that looks
+# ``.grad_fn`` attribute, you will see a graph of computations that looks
 # like this:
 #
 # ::
@@ -181,9 +181,9 @@ def num_flat_features(self, x):
 #
 # For illustration, let us follow a few steps backward:
 
-print(loss.creator)  # MSELoss
-print(loss.creator.previous_functions[0][0])  # Linear
-print(loss.creator.previous_functions[0][0].previous_functions[0][0])  # ReLU
+print(loss.grad_fn)  # MSELoss
+print(loss.grad_fn.next_functions[0][0])  # Linear
+print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU
 
 ########################################################################
 # Backprop
diff --git a/beginner_source/nlp/advanced_tutorial.py b/beginner_source/nlp/advanced_tutorial.py
@@ -3,7 +3,7 @@
 Advanced: Making Dynamic Decisions and the Bi-LSTM CRF
 ======================================================
 
-Dyanmic versus Static Deep Learning Toolkits
+Dynamic versus Static Deep Learning Toolkits
 --------------------------------------------
 
 Pytorch is a *dynamic* neural network kit. Another example of a dynamic
diff --git a/beginner_source/nlp/word_embeddings_tutorial.py b/beginner_source/nlp/word_embeddings_tutorial.py
@@ -290,7 +290,7 @@ def forward(self, inputs):
 # and :math:`w_{i+1}, \dots, w_{i+N}`, referring to all context words
 # collectively as :math:`C`, CBOW tries to minimize
 #
-# .. math::  -\log p(w_i | C) = \log \text{Softmax}(A(\sum_{w \in C} q_w) + b)
+# .. math::  -\log p(w_i | C) = -\log \text{Softmax}(A(\sum_{w \in C} q_w) + b)
 #
 # where :math:`q_w` is the embedding for word :math:`w`.
 #

Original file line number	Diff line number	Diff line change
`@@ -290,7 +290,7 @@ def forward(self, inputs):`
`290`	`290`	# and :math:`w_{i+1}, \dots, w_{i+N}`, referring to all context words
`291`	`291`	# collectively as :math:`C`, CBOW tries to minimize
`292`	`292`	`#`
`293`		`-# .. math:: -\log p(w_i \| C) = \log \text{Softmax}(A(\sum_{w \in C} q_w) + b)`
	`293`	`+# .. math:: -\log p(w_i \| C) = -\log \text{Softmax}(A(\sum_{w \in C} q_w) + b)`
`294`	`294`	`#`
`295`	`295`	# where :math:`q_w` is the embedding for word :math:`w`.
`296`	`296`	`#`