pytorch
diff --git a/‎_downloads/dynamic_net.ipynb
Lines changed: 16 additions & 16 deletions b/‎_downloads/dynamic_net.ipynb
Lines changed: 16 additions & 16 deletions
diff --git a/‎_downloads/reinforcement_q_learning.ipynb
Lines changed: 58 additions & 58 deletions b/‎_downloads/reinforcement_q_learning.ipynb
Lines changed: 58 additions & 58 deletions
diff --git a/‎_downloads/tf_two_layer_net.ipynb
Lines changed: 16 additions & 16 deletions b/‎_downloads/tf_two_layer_net.ipynb
Lines changed: 16 additions & 16 deletions
@@ -1,50 +1,50 @@
 {
   "cells": [
     {
+      "cell_type": "code",
+      "metadata": {
+        "collapsed": false
+      },
       "source": [
         "%matplotlib inline"
       ],
-      "outputs": [],
       "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "cell_type": "code"
+      "outputs": []
     },
     {
+      "cell_type": "markdown",
       "source": [
         "\nPyTorch: Control Flow + Weight Sharing\n--------------------------------------\n\nTo showcase the power of PyTorch dynamic graphs, we will implement a very strange\nmodel: a fully-connected ReLU network that on each forward pass randomly chooses\na number between 1 and 4 and has that many hidden layers, reusing the same\nweights multiple times to compute the innermost hidden layers.\n\n"
       ],
-      "metadata": {},
-      "cell_type": "markdown"
+      "metadata": {}
     },
     {
+      "cell_type": "code",
+      "metadata": {
+        "collapsed": false
+      },
       "source": [
         "import random\nimport torch\nfrom torch.autograd import Variable\n\nclass DynamicNet(torch.nn.Module):\n  def __init__(self, D_in, H, D_out):\n    \"\"\"\n    In the constructor we construct three nn.Linear instances that we will use\n    in the forward pass.\n    \"\"\"\n    super(DynamicNet, self).__init__()\n    self.input_linear = torch.nn.Linear(D_in, H)\n    self.middle_linear = torch.nn.Linear(H, H)\n    self.output_linear = torch.nn.Linear(H, D_out)\n\n  def forward(self, x):\n    \"\"\"\n    For the forward pass of the model, we randomly choose either 0, 1, 2, or 3\n    and reuse the middle_linear Module that many times to compute hidden layer\n    representations.\n\n    Since each forward pass builds a dynamic computation graph, we can use normal\n    Python control-flow operators like loops or conditional statements when\n    defining the forward pass of the model.\n\n    Here we also see that it is perfectly safe to reuse the same Module many\n    times when defining a computational graph. This is a big improvement from Lua\n    Torch, where each Module could be used only once.\n    \"\"\"\n    h_relu = self.input_linear(x).clamp(min=0)\n    for _ in range(random.randint(0, 3)):\n      h_relu = self.middle_linear(h_relu).clamp(min=0)\n    y_pred = self.output_linear(h_relu)\n    return y_pred\n\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs, and wrap them in Variables\nx = Variable(torch.randn(N, D_in))\ny = Variable(torch.randn(N, D_out), requires_grad=False)\n\n# Construct our model by instantiating the class defined above\nmodel = DynamicNet(D_in, H, D_out)\n\n# Construct our loss function and an Optimizer. Training this strange model with\n# vanilla stochastic gradient descent is tough, so we use momentum\ncriterion = torch.nn.MSELoss(size_average=False)\noptimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)\nfor t in range(500):\n  # Forward pass: Compute predicted y by passing x to the model\n  y_pred = model(x)\n\n  # Compute and print loss\n  loss = criterion(y_pred, y)\n  print(t, loss.data[0])\n\n  # Zero gradients, perform a backward pass, and update the weights.\n  optimizer.zero_grad()\n  loss.backward()\n  optimizer.step()"
       ],
-      "outputs": [],
       "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "cell_type": "code"
+      "outputs": []
     }
   ],
   "metadata": {
     "kernelspec": {
-      "language": "python",
       "name": "python3",
+      "language": "python",
       "display_name": "Python 3"
     },
     "language_info": {
-      "nbconvert_exporter": "python",
-      "mimetype": "text/x-python",
       "codemirror_mode": {
         "name": "ipython",
         "version": 3
       },
-      "pygments_lexer": "ipython3",
       "name": "python",
+      "pygments_lexer": "ipython3",
+      "nbconvert_exporter": "python",
+      "mimetype": "text/x-python",
       "file_extension": ".py",
       "version": "3.5.2"
     }
 
@@ -1,50 +1,50 @@
 {
   "cells": [
     {
+      "cell_type": "code",
+      "metadata": {
+        "collapsed": false
+      },
       "source": [
         "%matplotlib inline"
       ],
-      "outputs": [],
       "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "cell_type": "code"
+      "outputs": []
     },
     {
+      "cell_type": "markdown",
       "source": [
         "\nTensorFlow: Static Graphs\n-------------------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation uses basic TensorFlow operations to set up a computational\ngraph, then executes the graph many times to actually train the network.\n\nOne of the main differences between TensorFlow and PyTorch is that TensorFlow\nuses static computational graphs while PyTorch uses dynamic computational\ngraphs.\n\nIn TensorFlow we first set up the computational graph, then execute the same\ngraph many times.\n\n"
       ],
-      "metadata": {},
-      "cell_type": "markdown"
+      "metadata": {}
     },
     {
+      "cell_type": "code",
+      "metadata": {
+        "collapsed": false
+      },
       "source": [
         "import tensorflow as tf\nimport numpy as np\n\n# First we set up the computational graph:\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create placeholders for the input and target data; these will be filled\n# with real data when we execute the graph.\nx = tf.placeholder(tf.float32, shape=(None, D_in))\ny = tf.placeholder(tf.float32, shape=(None, D_out))\n\n# Create Variables for the weights and initialize them with random data.\n# A TensorFlow Variable persists its value across executions of the graph.\nw1 = tf.Variable(tf.random_normal((D_in, H)))\nw2 = tf.Variable(tf.random_normal((H, D_out)))\n\n# Forward pass: Compute the predicted y using operations on TensorFlow Tensors.\n# Note that this code does not actually perform any numeric operations; it\n# merely sets up the computational graph that we will later execute.\nh = tf.matmul(x, w1)\nh_relu = tf.maximum(h, tf.zeros(1))\ny_pred = tf.matmul(h_relu, w2)\n\n# Compute loss using operations on TensorFlow Tensors\nloss = tf.reduce_sum((y - y_pred) ** 2.0)\n\n# Compute gradient of the loss with respect to w1 and w2.\ngrad_w1, grad_w2 = tf.gradients(loss, [w1, w2])\n\n# Update the weights using gradient descent. To actually update the weights\n# we need to evaluate new_w1 and new_w2 when executing the graph. Note that\n# in TensorFlow the the act of updating the value of the weights is part of\n# the computational graph; in PyTorch this happens outside the computational\n# graph.\nlearning_rate = 1e-6\nnew_w1 = w1.assign(w1 - learning_rate * grad_w1)\nnew_w2 = w2.assign(w2 - learning_rate * grad_w2)\n\n# Now we have built our computational graph, so we enter a TensorFlow session to\n# actually execute the graph.\nwith tf.Session() as sess:\n  # Run the graph once to initialize the Variables w1 and w2.\n  sess.run(tf.global_variables_initializer())\n\n  # Create numpy arrays holding the actual data for the inputs x and targets y\n  x_value = np.random.randn(N, D_in)\n  y_value = np.random.randn(N, D_out)\n  for _ in range(500):\n    # Execute the graph many times. Each time it executes we want to bind\n    # x_value to x and y_value to y, specified with the feed_dict argument.\n    # Each time we execute the graph we want to compute the values for loss,\n    # new_w1, and new_w2; the values of these Tensors are returned as numpy\n    # arrays.\n    loss_value, _, _ = sess.run([loss, new_w1, new_w2],\n                                feed_dict={x: x_value, y: y_value})\n    print(loss_value)"
       ],
-      "outputs": [],
       "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "cell_type": "code"
+      "outputs": []
     }
   ],
   "metadata": {
     "kernelspec": {
-      "language": "python",
       "name": "python3",
+      "language": "python",
       "display_name": "Python 3"
     },
     "language_info": {
-      "nbconvert_exporter": "python",
-      "mimetype": "text/x-python",
       "codemirror_mode": {
         "name": "ipython",
         "version": 3
       },
-      "pygments_lexer": "ipython3",
       "name": "python",
+      "pygments_lexer": "ipython3",
+      "nbconvert_exporter": "python",
+      "mimetype": "text/x-python",
       "file_extension": ".py",
       "version": "3.5.2"
     }