|
114 | 114 | #
|
115 | 115 | # .. math::
|
116 | 116 | # J=\left(\begin{array}{ccc}
|
117 |
| -# \frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}}\\ |
| 117 | +# \frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{1}}{\partial x_{n}}\\ |
118 | 118 | # \vdots & \ddots & \vdots\\
|
119 |
| -# \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}} |
| 119 | +# \frac{\partial y_{m}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}} |
120 | 120 | # \end{array}\right)
|
121 | 121 | #
|
122 | 122 | # Generally speaking, ``torch.autograd`` is an engine for computing
|
123 |
| -# Jacobian-vector product. That is, given any vector |
| 123 | +# vector-Jacobian product. That is, given any vector |
124 | 124 | # :math:`v=\left(\begin{array}{cccc} v_{1} & v_{2} & \cdots & v_{m}\end{array}\right)^{T}`,
|
125 |
| -# compute the product :math:`J\cdot v`. If :math:`v` happens to be |
| 125 | +# compute the product :math:`v^{T}\cdot J`. If :math:`v` happens to be |
126 | 126 | # the gradient of a scalar function :math:`l=g\left(\vec{y}\right)`,
|
127 | 127 | # that is,
|
128 | 128 | # :math:`v=\left(\begin{array}{ccc}\frac{\partial l}{\partial y_{1}} & \cdots & \frac{\partial l}{\partial y_{m}}\end{array}\right)^{T}`,
|
129 |
| -# then by the chain rule, the Jacobian-vector product would be the |
| 129 | +# then by the chain rule, the vector-Jacobian product would be the |
130 | 130 | # gradient of :math:`l` with respect to :math:`\vec{x}`:
|
131 | 131 | #
|
132 | 132 | # .. math::
|
133 |
| -# J\cdot v=\left(\begin{array}{ccc} |
| 133 | +# J^{T}\cdot v=\left(\begin{array}{ccc} |
134 | 134 | # \frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}}\\
|
135 | 135 | # \vdots & \ddots & \vdots\\
|
136 | 136 | # \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}}
|
|
144 | 144 | # \frac{\partial l}{\partial x_{n}}
|
145 | 145 | # \end{array}\right)
|
146 | 146 | #
|
147 |
| -# This characteristic of Jacobian-vector product makes it very |
| 147 | +# (Note that :math:`v^{T}\cdot J` gives a row vector which can be |
| 148 | +# treated as a column vector by taking :math:`J^{T}\cdot v`.) |
| 149 | +# |
| 150 | +# This characteristic of vector-Jacobian product makes it very |
148 | 151 | # convenient to feed external gradients into a model that has
|
149 | 152 | # non-scalar output.
|
150 | 153 |
|
151 | 154 | ###############################################################
|
152 |
| -# Now let's take a look at an example of Jacobian-vector product: |
| 155 | +# Now let's take a look at an example of vector-Jacobian product: |
153 | 156 |
|
154 | 157 | x = torch.randn(3, requires_grad=True)
|
155 | 158 |
|
|
162 | 165 | ###############################################################
|
163 | 166 | # Now in this case ``y`` is no longer a scalar. ``torch.autograd``
|
164 | 167 | # could not compute the full Jacobian directly, but if we just
|
165 |
| -# want the Jacobian-vector product, simply pass the vector to |
| 168 | +# want the vector-Jacobian product, simply pass the vector to |
166 | 169 | # ``backward`` as argument:
|
167 | 170 | v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
|
168 | 171 | y.backward(v)
|
|
0 commit comments