Skip to content

Commit 3aaa3d2

Browse files
author
Svetlana Karslioglu
committed
Pyspelling: intermediate Python tutorials N-Z
1 parent 2dac3e4 commit 3aaa3d2

File tree

7 files changed

+99
-69
lines changed

7 files changed

+99
-69
lines changed

.pyspelling.yml

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,27 @@ spellchecker: aspell
22
matrix:
33
- name: python
44
sources:
5-
- beginner_source/*.py
6-
- intermediate_source/autograd_saved_tensors_hooks_tutorial.py
7-
- intermediate_source/ax_multiobjective_nas_tutorial.py
8-
- intermediate_source/char_rnn_classification_tutorial.py
9-
- intermediate_source/char_rnn_generation_tutorial.py
10-
- intermediate_source/custom_function_conv_bn_tutorial.py
11-
- intermediate_source/ensembling.py
5+
#- beginner_source/*.py
6+
#- intermediate_source/autograd_saved_tensors_hooks_tutorial.py
7+
#- intermediate_source/ax_multiobjective_nas_tutorial.py
8+
#- intermediate_source/char_rnn_classification_tutorial.py
9+
#- intermediate_source/char_rnn_generation_tutorial.py
10+
#- intermediate_source/custom_function_conv_bn_tutorial.py
11+
#- intermediate_source/ensembling.py
1212
#- intermediate_source/flask_rest_api_tutorial.py
13-
- intermediate_source/forward_ad_usage.py
14-
- intermediate_source/fx_conv_bn_fuser.py
15-
- intermediate_source/fx_profiling_tutorial.py
16-
- intermediate_source/jacobians_hessians.py
17-
- intermediate_source/mario_rl_tutorial.py
18-
- intermediate_source/mnist_train_nas.py
19-
- intermediate_source/memory_format_tutorial.py
20-
- intermediate_source/model_parallel_tutorial.py
13+
#- intermediate_source/forward_ad_usage.py
14+
#- intermediate_source/fx_conv_bn_fuser.py
15+
#- intermediate_source/fx_profiling_tutorial.py
16+
#- intermediate_source/jacobians_hessians.py
17+
#- intermediate_source/mario_rl_tutorial.py
18+
#- intermediate_source/mnist_train_nas.py
19+
#- intermediate_source/memory_format_tutorial.py
20+
#- intermediate_source/model_parallel_tutorial.py
21+
#- intermediate_source/neural_tangent_kernels.py
22+
#- intermediate_source/nvfuser_intro_tutorial.py
23+
#- intermediate_source/parametrizations.py
24+
#- intermediate_source/per_sample_grads.py
25+
- intermediate_source/pipeline_tutorial.py
2126
dictionary:
2227
wordlists:
2328
- en-wordlist.txt
@@ -46,7 +51,7 @@ matrix:
4651
- open: '(?s)^::\n\n '
4752
close: '^\n'
4853
# Ignore reStructuredText block directives
49-
- open: '\.\. (code-block)::.*$\n*'
54+
- open: '\.\. (code-block|math)::.*$\n*'
5055
content: '(?P<first>(^(?P<indent>[ ]+).*$\n))(?P<other>(^([ \t]+.*|[ \t]*)$\n)*)'
5156
close: '(^(?![ \t]+.*$))'
5257
- pyspelling.filters.markdown:

en-wordlist.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
RPC
2+
multihead
3+
GPU's
4+
Lipschitz
5+
Frobenius
6+
reimplement
7+
reimplements
8+
reimplementing
9+
parametrizing
10+
unparametrized
11+
submodules
12+
SPD
13+
Cayley
14+
parametrization
15+
parametrized
16+
parametrizations
117
APIs
218
Args
319
Autograd
@@ -38,6 +54,7 @@ GANs
3854
GPUs
3955
GRU
4056
GRUs
57+
GTC
4158
GeForce
4259
Goodfellow
4360
Goodfellow’s
@@ -69,6 +86,7 @@ NAS
6986
NCHW
7087
NES
7188
NLP
89+
NTK
7290
NaN
7391
NeurIPS
7492
NumPy
@@ -161,6 +179,7 @@ finetuning
161179
fp
162180
functorch
163181
fuser
182+
geomean
164183
grayscale
165184
hardcode
166185
helpdesk
@@ -204,6 +223,8 @@ ndarrays
204223
num
205224
numericalize
206225
numpy
226+
nvFuser
227+
nvFuser's
207228
optimizable
208229
optimizer's
209230
optimizers
@@ -213,6 +234,7 @@ parallelization
213234
perceptibility
214235
pipelining
215236
pointwise
237+
precompute
216238
precomputing
217239
prepend
218240
preprocess
@@ -229,6 +251,7 @@ quantizing
229251
queryable
230252
randint
231253
readably
254+
recomputation
232255
reinitializes
233256
relu
234257
reproducibility
@@ -262,6 +285,7 @@ timesteps
262285
tokenization
263286
tokenize
264287
tokenizer
288+
topologies
265289
torchaudio
266290
torchdata
267291
torchscriptable
@@ -278,6 +302,7 @@ unfused
278302
unimodal
279303
unnormalized
280304
unpickling
305+
updation
281306
utils
282307
vectorization
283308
vectorize

intermediate_source/neural_tangent_kernels.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def forward(self, x):
5858
# we will need a function that accepts the parameters of the model and a single
5959
# input (as opposed to a batch of inputs!) and returns a single output.
6060
#
61-
# We'll use ``torch.func.functional_call``, which allows us to call an nn.Module
61+
# We'll use ``torch.func.functional_call``, which allows us to call an ``nn.Module``
6262
# using different parameters/buffers, to help accomplish the first step.
6363
#
6464
# Keep in mind that the model was originally written to accept a batch of input
@@ -200,21 +200,21 @@ def func_x2(params):
200200
output, vjp_fn = vjp(func_x1, params)
201201

202202
def get_ntk_slice(vec):
203-
# This computes vec @ J(x2).T
203+
# This computes ``vec @ J(x2).T``
204204
# `vec` is some unit vector (a single slice of the Identity matrix)
205205
vjps = vjp_fn(vec)
206-
# This computes J(X1) @ vjps
206+
# This computes ``J(X1) @ vjps``
207207
_, jvps = jvp(func_x2, (params,), vjps)
208208
return jvps
209209

210210
# Here's our identity matrix
211211
basis = torch.eye(output.numel(), dtype=output.dtype, device=output.device).view(output.numel(), -1)
212212
return vmap(get_ntk_slice)(basis)
213213

214-
# get_ntk(x1, x2) computes the NTK for a single data point x1, x2
215-
# Since the x1, x2 inputs to empirical_ntk_ntk_vps are batched,
214+
# ``get_ntk(x1, x2)`` computes the NTK for a single data point x1, x2
215+
# Since the x1, x2 inputs to ``empirical_ntk_ntk_vps`` are batched,
216216
# we actually wish to compute the NTK between every pair of data points
217-
# between {x1} and {x2}. That's what the vmaps here do.
217+
# between {x1} and {x2}. That's what the ``vmaps`` here do.
218218
result = vmap(vmap(get_ntk, (None, 0)), (0, None))(x1, x2)
219219

220220
if compute == 'full':

intermediate_source/nvfuser_intro_tutorial.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
# networks, so improving the speed of these operations can improve
7272
# overall network training speed. Future releases of nvFuser will
7373
# improve the performance of Linear Layers, but for now we will
74-
# specifically look at the Bias-Dropout-Add-LayerNorm section of this
74+
# specifically look at the ``Bias-Dropout-Add-LayerNorm`` section of this
7575
# Transformer Block.
7676
#
7777
# .. figure:: /_static/img/nvfuser_intro/nvfuser_transformer_block.png
@@ -154,7 +154,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
154154
# Run model, forward and backward
155155
output = forward_func()
156156
output.backward(grad_output)
157-
# delete gradiens to avoid profiling the gradient accumulation
157+
# delete gradients to avoid profiling the gradient accumulation
158158
for p in parameters:
159159
p.grad = None
160160

@@ -165,7 +165,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
165165
# Run model, forward and backward
166166
output = forward_func()
167167
output.backward(grad_output)
168-
# delete gradiens to avoid profiling the gradient accumulation
168+
# delete gradients to avoid profiling the gradient accumulation
169169
for p in parameters:
170170
p.grad = None
171171

@@ -265,7 +265,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
265265
# nvFuser took around 2.4s in total to compile these high speed
266266
# GPU functions.
267267
#
268-
# nvFusers capabilities extend well beyond this initial performance gain.
268+
# nvFuser's capabilities extend well beyond this initial performance gain.
269269
#
270270

271271
######################################################################
@@ -281,7 +281,7 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
281281
# To use nvFuser on inputs that change shape from iteration, we
282282
# generate new input and output gradient tensors and make a few
283283
# different sizes. Since the last dimension is shared with the
284-
# parameters and cannot be changed dynamically in LayerNorm, we
284+
# parameters and cannot be changed dynamically in ``LayerNorm``, we
285285
# perturb the first two dimensions of the input and gradient tensors.
286286
#
287287

@@ -390,16 +390,16 @@ def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
390390
#
391391

392392
######################################################################
393-
# Defining novel operations with nvFuser and FuncTorch
393+
# Defining novel operations with nvFuser and functorch
394394
# ----------------------------------------------------
395395
#
396396
# One of the primary benefits of nvFuser is the ability to define
397397
# novel operations composed of PyTorch “primitives” which are then
398398
# just-in-time compiled into efficient kernels.
399399
#
400400
# PyTorch has strong performance for any individual operation,
401-
# especially composite operations like LayerNorm. However, if
402-
# LayerNorm wasn’t already implemented in PyTorch as a composite
401+
# especially composite operations like ``LayerNorm``. However, if
402+
# ``LayerNorm`` wasn’t already implemented in PyTorch as a composite
403403
# operation, then you’d have to define it as a series of simpler
404404
# (primitive) operations. Let’s make such a definition and run it
405405
# without nvFuser.
@@ -488,7 +488,7 @@ def primitive_definition(
488488
#
489489
# However, the performance is still slower than the original eager
490490
# mode performance of the composite definition. TorchScript works well
491-
# when predefined composite operations are used, however TorchScript’s
491+
# when predefined composite operations are used, however TorchScript
492492
# application of Autograd saves all of the activations for each
493493
# operator in the fusion for re-use in the backwards pass. However,
494494
# this is not typically the optimal choice. Especially when chaining
@@ -499,7 +499,7 @@ def primitive_definition(
499499
# It’s possible to optimize away many of these unnecessary memory
500500
# accesses, but it requires building a connected forward and backward
501501
# graph which isn’t possible with TorchScript. The
502-
# `memory_efficient_fusion` pass in FuncTorch, however, is such an
502+
# ``memory_efficient_fusion`` pass in functorch, however, is such an
503503
# optimization pass. To use this pass, we have to redefine our
504504
# function to pull the constants inside (for now it’s easiest to make
505505
# non-tensor constants literals in the function definition):
@@ -527,11 +527,11 @@ def primitive_definition_for_memory_efficient_fusion(
527527

528528
######################################################################
529529
# Now, instead of passing our function to TorchScript, we will pass it
530-
# to FuncTorch’s optimization pass.
530+
# to functorch optimization pass.
531531
#
532532

533533

534-
# Optimize the model with FuncTorch tracing and the memory efficiency
534+
# Optimize the model with functorch tracing and the memory efficiency
535535
# optimization pass
536536
memory_efficient_primitive_definition = memory_efficient_fusion(
537537
primitive_definition_for_memory_efficient_fusion
@@ -550,22 +550,22 @@ def primitive_definition_for_memory_efficient_fusion(
550550

551551
######################################################################
552552
# This recovers even more speed, but it’s still not as fast as
553-
# TorchScripts original performance with the composite definition.
553+
# TorchScript original performance with the composite definition.
554554
# However, this is still faster than running this new definition
555555
# without nvFuser, and is still faster than the composite definition
556556
# without nvFuser.
557557
#
558558
# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_5.png
559559
#
560-
# .. note:: FuncTorch’s memory efficient pass is experimental and still
560+
# .. note:: The functorch memory efficient pass is experimental and still
561561
# actively in development.
562562
# Future versions of the API are expected to achieve performance
563563
# closer to that of TorchScript with the composite definition.
564564
#
565-
# .. note:: FuncTorch’s memory efficient pass specializes on the shapes of
565+
# .. note:: The functorch memory efficient pass specializes on the shapes of
566566
# the inputs to the function. If new inputs are provided with
567567
# different shapes, then you need to construct a new function
568-
# using `memory_efficient_fusion` and apply it to the new inputs.
568+
# using ``memory_efficient_fusion`` and apply it to the new inputs.
569569

570570

571571
######################################################################
@@ -577,10 +577,10 @@ def primitive_definition_for_memory_efficient_fusion(
577577
# an entirely new operation in PyTorch – which takes a lot of time and
578578
# knowledge of the lower-level PyTorch code as well as parallel
579579
# programming – or writing the operation in simpler PyTorch ops and
580-
# settling for poor performance. For example, let's replace LayerNorm
581-
# in our example with RMSNorm. Even though RMSNorm is a bit simpler
582-
# than LayerNorm, it doesn’t have an existing compound operation in
583-
# PyTorch. See the `Root Mean Square Layer Normalization <https://doi.org/10.48550/arXiv.1910.07467>`__ paper for more information about RMSNorm.
580+
# settling for poor performance. For example, let's replace ``LayerNorm``
581+
# in our example with ``RMSNorm``. Even though ``RMSNorm`` is a bit simpler
582+
# than ``LayerNorm``, it doesn’t have an existing compound operation in
583+
# PyTorch. See the `Root Mean Square Layer Normalization <https://doi.org/10.48550/arXiv.1910.07467>`__ paper for more information about ``RMSNorm``.
584584
# As before, we’ll define our new transformer block with
585585
# primitive PyTorch operations.
586586
#
@@ -608,7 +608,7 @@ def with_rms_norm(
608608
# As before, we’ll get a baseline by running PyTorch without nvFuser.
609609
#
610610

611-
# Profile rms_norm
611+
# Profile ``rms_norm``
612612
func = functools.partial(
613613
with_rms_norm,
614614
input1,
@@ -625,7 +625,7 @@ def with_rms_norm(
625625
# With nvFuser through TorchScript.
626626
#
627627

628-
# Profile scripted rms_norm
628+
# Profile scripted ``rms_norm``
629629
scripted_with_rms_norm = torch.jit.script(with_rms_norm)
630630
func = functools.partial(
631631
scripted_with_rms_norm,
@@ -656,7 +656,7 @@ def with_rms_norm_for_memory_efficient_fusion(
656656
return norm_output
657657

658658

659-
# Profile memory efficient rms_norm
659+
# Profile memory efficient ``rms_norm``
660660
memory_efficient_rms_norm = memory_efficient_fusion(
661661
with_rms_norm_for_memory_efficient_fusion
662662
)
@@ -666,12 +666,12 @@ def with_rms_norm_for_memory_efficient_fusion(
666666
######################################################################
667667
# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_6.png
668668
#
669-
# Since RMSNorm is simpler than LayerNorm the performance of our new
669+
# Since ``RMSNorm`` is simpler than ``LayerNorm`` the performance of our new
670670
# transformer block is a little higher than the primitive definition
671671
# without nvFuser (354 iterations per second compared with 260
672672
# iterations per second). With TorchScript, the iterations per second
673673
# increases by 2.68x and 3.36x to 952 iterations per second and 1,191
674-
# iterations per second with TorchScript and FuncTorch’s memory
674+
# iterations per second with TorchScript and functorch memory
675675
# efficient optimization pass, respectively. The performance of this
676676
# new operation nearly matches the performance of the composite Layer
677677
# Norm definition with TorchScript.

intermediate_source/parametrizations.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
This approach proposes to decouple the learning of the parameters from the
2020
learning of their norms. To do so, the parameter is divided by its
2121
`Frobenius norm <https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm>`_
22-
and a separate parameter encoding its norm is learnt.
22+
and a separate parameter encoding its norm is learned.
2323
A similar regularization was proposed for GANs under the name of
2424
"`spectral normalization <https://pytorch.org/docs/stable/generated/torch.nn.utils.spectral_norm.html>`_". This method
2525
controls the Lipschitz constant of the network by dividing its parameters by
@@ -84,7 +84,7 @@ def forward(self, x):
8484
# 2) It does not separate the layer and the parametrization. If the parametrization were
8585
# more difficult, we would have to rewrite its code for each layer that we want to use it
8686
# in.
87-
# 3) It recomputes the parametrization everytime we use the layer. If we use the layer
87+
# 3) It recomputes the parametrization every time we use the layer. If we use the layer
8888
# several times during the forward pass, (imagine the recurrent kernel of an RNN), it
8989
# would compute the same ``A`` every time that the layer is called.
9090
#
@@ -258,8 +258,8 @@ def forward(self, X):
258258
print((torch.symeig(X).eigenvalues > 0.).all()) # X is positive definite
259259

260260
###############################################################################
261-
# Intializing parametrizations
262-
# ----------------------------
261+
# Initializing parametrizations
262+
# -----------------------------
263263
#
264264
# Parametrizations come with a mechanism to initialize them. If we implement a method
265265
# ``right_inverse`` with signature
@@ -327,7 +327,7 @@ def right_inverse(self, A):
327327
###############################################################################
328328
# The name of this method comes from the fact that we would often expect
329329
# that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that
330-
# the forward afer the initalization with value ``X`` should return the value ``X``.
330+
# the forward after the initialization with value ``X`` should return the value ``X``.
331331
# This constraint is not strongly enforced in practice. In fact, at times, it might be of
332332
# interest to relax this relation. For example, consider the following implementation
333333
# of a randomized pruning method:

0 commit comments

Comments
 (0)