diff --git a/beginner_source/ptcheat.rst b/beginner_source/ptcheat.rst index d1360b5b86c..dda57fc8cb4 100644 --- a/beginner_source/ptcheat.rst +++ b/beginner_source/ptcheat.rst @@ -80,8 +80,8 @@ Distributed Training .. code-block:: python - import torch.distributed as dist # distributed communication - from multiprocessing import Process # memory sharing processes + import torch.distributed as dist # distributed communication + from torch.multiprocessing import Process # memory sharing processes See `distributed `__ and @@ -95,13 +95,13 @@ Creation .. code-block:: python - torch.randn(*size) # tensor with independent N(0,1) entries - torch.[ones|zeros](*size) # tensor with all 1's [or 0's] - torch.Tensor(L) # create tensor from [nested] list or ndarray L - x.clone() # clone of x - with torch.no_grad(): # code wrap that stops autograd from tracking tensor history - requires_grad=True # arg, when set to True, tracks computation - # history for future derivative calculations + x = torch.randn(*size) # tensor with independent N(0,1) entries + x = torch.[ones|zeros](*size) # tensor with all 1's [or 0's] + x = torch.tensor(L) # create tensor from [nested] list or ndarray L + y = x.clone() # clone of x + with torch.no_grad(): # code wrap that stops autograd from tracking tensor history + requires_grad=True # arg, when set to True, tracks computation + # history for future derivative calculations See `tensor `__ @@ -110,14 +110,16 @@ Dimensionality .. code-block:: python - x.size() # return tuple-like object of dimensions - torch.cat(tensor_seq, dim=0) # concatenates tensors along dim - x.view(a,b,...) # reshapes x into size (a,b,...) - x.view(-1,a) # reshapes x into size (b,a) for some b - x.transpose(a,b) # swaps dimensions a and b - x.permute(*dims) # permutes dimensions - x.unsqueeze(dim) # tensor with added axis - x.unsqueeze(dim=2) # (a,b,c) tensor -> (a,b,1,c) tensor + x.size() # return tuple-like object of dimensions + x = torch.cat(tensor_seq, dim=0) # concatenates tensors along dim + y = x.view(a,b,...) # reshapes x into size (a,b,...) + y = x.view(-1,a) # reshapes x into size (b,a) for some b + y = x.transpose(a,b) # swaps dimensions a and b + y = x.permute(*dims) # permutes dimensions + y = x.unsqueeze(dim) # tensor with added axis + y = x.unsqueeze(dim=2) # (a,b,c) tensor -> (a,b,1,c) tensor + y = x.squeeze() # removes all dimensions of size 1 (a,1,b,1) -> (a,b) + y = x.squeeze(dim=1) # removes specified dimension of size 1 (a,1,b,1) -> (a,b,1) See `tensor `__ @@ -127,9 +129,9 @@ Algebra .. code-block:: python - A.mm(B) # matrix multiplication - A.mv(x) # matrix-vector multiplication - x.t() # matrix transpose + ret = A.mm(B) # matrix multiplication + ret = A.mv(x) # matrix-vector multiplication + x = x.t() # matrix transpose See `math operations `__ @@ -139,24 +141,24 @@ GPU Usage .. code-block:: python - torch.cuda.is_available # check for cuda - x.cuda() # move x's data from - # CPU to GPU and return new object + torch.cuda.is_available # check for cuda + x = x.cuda() # move x's data from + # CPU to GPU and return new object - x.cpu() # move x's data from GPU to CPU - # and return new object + x = x.cpu() # move x's data from GPU to CPU + # and return new object - if not args.disable_cuda and torch.cuda.is_available(): # device agnostic code - args.device = torch.device('cuda') # and modularity - else: # - args.device = torch.device('cpu') # + if not args.disable_cuda and torch.cuda.is_available(): # device agnostic code + args.device = torch.device('cuda') # and modularity + else: # + args.device = torch.device('cpu') # - net.to(device) # recursively convert their - # parameters and buffers to - # device specific tensors + net.to(device) # recursively convert their + # parameters and buffers to + # device specific tensors - mytensor.to(device) # copy your tensors to a device - # (gpu, cpu) + x = x.to(device) # copy your tensors to a device + # (gpu, cpu) See `cuda `__ @@ -175,7 +177,7 @@ Deep Learning nn.MaxPoolXd(s) # X dimension pooling layer # (notation as above) - nn.BatchNorm # batch norm layer + nn.BatchNormXd # batch norm layer nn.RNN/LSTM/GRU # recurrent layers nn.Dropout(p=0.5, inplace=False) # dropout layer for any dimensional input nn.Dropout2d(p=0.5, inplace=False) # 2-dimensional channel-wise dropout @@ -189,11 +191,15 @@ Loss Functions .. code-block:: python - nn.X # where X is BCELoss, CrossEntropyLoss, - # L1Loss, MSELoss, NLLLoss, SoftMarginLoss, - # MultiLabelSoftMarginLoss, CosineEmbeddingLoss, - # KLDivLoss, MarginRankingLoss, HingeEmbeddingLoss - # or CosineEmbeddingLoss + nn.X # where X is L1Loss, MSELoss, CrossEntropyLoss + # CTCLoss, NLLLoss, PoissonNLLLoss, + # KLDivLoss, BCELoss, BCEWithLogitsLoss, + # MarginRankingLoss, HingeEmbeddingLoss, + # MultiLabelMarginLoss, SmoothL1Loss, + # SoftMarginLoss, MultiLabelSoftMarginLoss, + # CosineEmbeddingLoss, MultiMarginLoss, + # or TripletMarginLoss + See `loss functions `__ @@ -204,10 +210,10 @@ Activation Functions .. code-block:: python nn.X # where X is ReLU, ReLU6, ELU, SELU, PReLU, LeakyReLU, - # Threshold, HardTanh, Sigmoid, Tanh, - # LogSigmoid, Softplus, SoftShrink, - # Softsign, TanhShrink, Softmin, Softmax, - # Softmax2d or LogSoftmax + # RReLu, CELU, GELU, Threshold, Hardshrink, HardTanh, + # Sigmoid, LogSigmoid, Softplus, SoftShrink, + # Softsign, Tanh, TanhShrink, Softmin, Softmax, + # Softmax2d, LogSoftmax or AdaptiveSoftmaxWithLoss See `activation functions `__ @@ -220,8 +226,8 @@ Optimizers opt = optim.x(model.parameters(), ...) # create optimizer opt.step() # update weights optim.X # where X is SGD, Adadelta, Adagrad, Adam, - # SparseAdam, Adamax, ASGD, - # LBFGS, RMSProp or Rprop + # AdamW, SparseAdam, Adamax, ASGD, + # LBFGS, RMSprop or Rprop See `optimizers `__ @@ -232,8 +238,10 @@ Learning rate scheduling scheduler = optim.X(optimizer,...) # create lr scheduler scheduler.step() # update lr at start of epoch - optim.lr_scheduler.X # where X is LambdaLR, StepLR, MultiStepLR, - # ExponentialLR or ReduceLROnPLateau + optim.lr_scheduler.X # where X is LambdaLR, MultiplicativeLR, + # StepLR, MultiStepLR, ExponentialLR, + # CosineAnnealingLR, ReduceLROnPlateau, CyclicLR, + # OneCycleLR, CosineAnnealingWarmRestarts, See `learning rate scheduler `__ @@ -264,8 +272,8 @@ Dataloaders and DataSamplers sampler.Sampler(dataset,...) # abstract class dealing with # ways to sample from dataset - sampler.XSampler where ... # Sequential, Random, Subset, - # WeightedRandom or Distributed + sampler.XSampler where ... # Sequential, Random, SubsetRandom, + # WeightedRandom, Batch, Distributed See `dataloader `__