diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py index 2629f3a7a00..007ad3fd956 100644 --- a/beginner_source/fgsm_tutorial.py +++ b/beginner_source/fgsm_tutorial.py @@ -27,7 +27,7 @@ ###################################################################### # Threat Model # ------------ -# +# # For context, there are many categories of adversarial attacks, each with # a different goal and assumption of the attacker’s knowledge. However, in # general the overarching goal is to add the least amount of perturbation @@ -45,14 +45,14 @@ # misclassification* means the adversary wants to alter an image that is # originally of a specific source class so that it is classified as a # specific target class. -# +# # In this case, the FGSM attack is a *white-box* attack with the goal of # *misclassification*. With this background information, we can now # discuss the attack in detail. -# +# # Fast Gradient Sign Attack # ------------------------- -# +# # One of the first and most popular adversarial attacks to date is # referred to as the *Fast Gradient Sign Attack (FGSM)* and is described # by Goodfellow et. al. in `Explaining and Harnessing Adversarial @@ -64,7 +64,7 @@ # the loss* based on the same backpropagated gradients. In other words, # the attack uses the gradient of the loss w.r.t the input data, then # adjusts the input data to maximize the loss. -# +# # Before we jump into the code, let’s look at the famous # `FGSM `__ panda example and extract # some notation. @@ -85,10 +85,10 @@ # maximize the loss. The resulting perturbed image, :math:`x'`, is then # *misclassified* by the target network as a “gibbon” when it is still # clearly a “panda”. -# +# # Hopefully now the motivation for this tutorial is clear, so lets jump # into the implementation. -# +# import torch import torch.nn as nn @@ -102,16 +102,16 @@ ###################################################################### # Implementation # -------------- -# +# # In this section, we will discuss the input parameters for the tutorial, # define the model under attack, then code the attack and run some tests. -# +# # Inputs # ~~~~~~ -# +# # There are only three inputs for this tutorial, and are defined as # follows: -# +# # - ``epsilons`` - List of epsilon values to use for the run. It is # important to keep 0 in the list because it represents the model # performance on the original test set. Also, intuitively we would @@ -119,16 +119,16 @@ # but the more effective the attack in terms of degrading model # accuracy. Since the data range here is :math:`[0,1]`, no epsilon # value should exceed 1. -# +# # - ``pretrained_model`` - path to the pretrained MNIST model which was # trained with # `pytorch/examples/mnist `__. # For simplicity, download the pretrained model `here `__. -# +# # - ``use_cuda`` - boolean flag to use CUDA if desired and available. # Note, a GPU with CUDA is not critical for this tutorial as a CPU will # not take much time. -# +# epsilons = [0, .05, .1, .15, .2, .25, .3] pretrained_model = "data/lenet_mnist_model.pth" @@ -140,7 +140,7 @@ ###################################################################### # Model Under Attack # ~~~~~~~~~~~~~~~~~~ -# +# # As mentioned, the model under attack is the same MNIST model from # `pytorch/examples/mnist `__. # You may train and save your own MNIST model or you can download and use @@ -148,7 +148,7 @@ # been copied from the MNIST example. The purpose of this section is to # define the model and dataloader, then initialize the model and load the # pretrained weights. -# +# # LeNet Model definition class Net(nn.Module): @@ -181,7 +181,7 @@ def forward(self, x): datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), - ])), + ])), batch_size=1, shuffle=True) # Define what device we are using @@ -201,7 +201,7 @@ def forward(self, x): ###################################################################### # FGSM Attack # ~~~~~~~~~~~ -# +# # Now, we can define the function that creates the adversarial examples by # perturbing the original inputs. The ``fgsm_attack`` function takes three # inputs, *image* is the original clean image (:math:`x`), *epsilon* is @@ -209,12 +209,12 @@ def forward(self, x): # is gradient of the loss w.r.t the input image # (:math:`\nabla_{x} J(\mathbf{\theta}, \mathbf{x}, y)`). The function # then creates perturbed image as -# +# # .. math:: perturbed\_image = image + epsilon*sign(data\_grad) = x + \epsilon * sign(\nabla_{x} J(\mathbf{\theta}, \mathbf{x}, y)) -# +# # Finally, in order to maintain the original range of the data, the # perturbed image is clipped to range :math:`[0,1]`. -# +# # FGSM attack code def fgsm_attack(image, epsilon, data_grad): @@ -244,14 +244,14 @@ def denorm(batch, mean=[0.1307], std=[0.3081]): mean = torch.tensor(mean).to(device) if isinstance(std, list): std = torch.tensor(std).to(device) - + return batch * std.view(1, -1, 1, 1) + mean.view(1, -1, 1, 1) ###################################################################### # Testing Function # ~~~~~~~~~~~~~~~~ -# +# # Finally, the central result of this tutorial comes from the ``test`` # function. Each call to this test function performs a full test step on # the MNIST test set and reports a final accuracy. However, notice that @@ -264,7 +264,7 @@ def denorm(batch, mean=[0.1307], std=[0.3081]): # if the perturbed example is adversarial. In addition to testing the # accuracy of the model, the function also saves and returns some # successful adversarial examples to be visualized later. -# +# def test( model, device, test_loader, epsilon ): @@ -338,7 +338,7 @@ def test( model, device, test_loader, epsilon ): ###################################################################### # Run Attack # ~~~~~~~~~~ -# +# # The last part of the implementation is to actually run the attack. Here, # we run a full test step for each epsilon value in the *epsilons* input. # For each epsilon we also save the final accuracy and some successful @@ -346,7 +346,7 @@ def test( model, device, test_loader, epsilon ): # the printed accuracies decrease as the epsilon value increases. Also, # note the :math:`\epsilon=0` case represents the original test accuracy, # with no attack. -# +# accuracies = [] examples = [] @@ -361,10 +361,10 @@ def test( model, device, test_loader, epsilon ): ###################################################################### # Results # ------- -# +# # Accuracy vs Epsilon # ~~~~~~~~~~~~~~~~~~~ -# +# # The first result is the accuracy versus epsilon plot. As alluded to # earlier, as epsilon increases we expect the test accuracy to decrease. # This is because larger epsilons mean we take a larger step in the @@ -375,7 +375,7 @@ def test( model, device, test_loader, epsilon ): # lower than :math:`\epsilon=0.15`. Also, notice the accuracy of the model # hits random accuracy for a 10-class classifier between # :math:`\epsilon=0.25` and :math:`\epsilon=0.3`. -# +# plt.figure(figsize=(5,5)) plt.plot(epsilons, accuracies, "*-") @@ -390,7 +390,7 @@ def test( model, device, test_loader, epsilon ): ###################################################################### # Sample Adversarial Examples # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # Remember the idea of no free lunch? In this case, as epsilon increases # the test accuracy decreases **BUT** the perturbations become more easily # perceptible. In reality, there is a tradeoff between accuracy @@ -403,7 +403,7 @@ def test( model, device, test_loader, epsilon ): # perturbations start to become evident at :math:`\epsilon=0.15` and are # quite evident at :math:`\epsilon=0.3`. However, in all cases humans are # still capable of identifying the correct class despite the added noise. -# +# # Plot several examples of adversarial samples at each epsilon cnt = 0 @@ -426,7 +426,7 @@ def test( model, device, test_loader, epsilon ): ###################################################################### # Where to go next? # ----------------- -# +# # Hopefully this tutorial gives some insight into the topic of adversarial # machine learning. There are many potential directions to go from here. # This attack represents the very beginning of adversarial attack research @@ -438,7 +438,7 @@ def test( model, device, test_loader, epsilon ): # on defense also leads into the idea of making machine learning models # more *robust* in general, to both naturally perturbed and adversarially # crafted inputs. -# +# # Another direction to go is adversarial attacks and defense in different # domains. Adversarial research is not limited to the image domain, check # out `this `__ attack on @@ -447,4 +447,8 @@ def test( model, device, test_loader, epsilon ): # implement a different attack from the NIPS 2017 competition, and see how # it differs from FGSM. Then, try to defend the model from your own # attacks. -# +# +# A further direction to go, depending on available resources, is to modify +# the code to support processing work in batch, in parallel, and or distributed +# vs working on one attack at a time in the above for each ``epsilon test()`` loop. +#