From 08b595d539c0e624ceb73869ee7c95376d4972e5 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Sat, 7 Mar 2015 00:34:24 -0800
Subject: [PATCH] [example] revise filter visualization

- download CaffeNet if it isn't there
- switch to caffe.Net
- reshape net for single input
- explain param, bias indexing
- update output for N-D blobs
---
 examples/filter_visualization.ipynb | 109 +++++++++++++++++++++---------------
 1 file changed, 64 insertions(+), 45 deletions(-)

diff --git a/examples/filter_visualization.ipynb b/examples/filter_visualization.ipynb
index 0bfdb5c..7125907 100644
--- a/examples/filter_visualization.ipynb
+++ b/examples/filter_visualization.ipynb
@@ -4,7 +4,7 @@
   "example_name": "Filter visualization",
   "include_in_docs": true,
   "priority": 2,
-  "signature": "sha256:44536e4f82eb5748b6a3bb6fcfca01bc6c5815dad2641c994dab031f452b7606"
+  "signature": "sha256:64c88129e2eeaa956e4c8a26467ff6119f24ea3d7ef15f8217326249973bea8f"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -24,7 +24,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "First, import required modules and set plotting parameters"
+      "First, import required modules, set plotting parameters, and run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model if it hasn't already been fetched."
      ]
     },
     {
@@ -44,7 +44,12 @@
       "\n",
       "plt.rcParams['figure.figsize'] = (10, 10)\n",
       "plt.rcParams['image.interpolation'] = 'nearest'\n",
-      "plt.rcParams['image.cmap'] = 'gray'"
+      "plt.rcParams['image.cmap'] = 'gray'\n",
+      "\n",
+      "import os\n",
+      "if not os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):\n",
+      "    print(\"Downloading pre-trained CaffeNet model...\")\n",
+      "    !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet"
      ],
      "language": "python",
      "metadata": {},
@@ -55,7 +60,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "Run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model, load the net, specify test phase and CPU mode, and configure input preprocessing."
+      "Set Caffe to CPU mode, load the net in the test phase for inference, and configure input preprocessing."
      ]
     },
     {
@@ -63,12 +68,16 @@
      "collapsed": false,
      "input": [
       "caffe.set_mode_cpu()\n",
-      "net = caffe.Classifier(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n",
-      "                       caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel')\n",
+      "net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n",
+      "                caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',\n",
+      "                caffe.TEST)\n",
+      "\n",
       "# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
-      "net.transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1))  # ImageNet mean\n",
-      "net.transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]\n",
-      "net.transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB"
+      "transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n",
+      "transformer.set_transpose('data', (2,0,1))\n",
+      "transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel\n",
+      "transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]\n",
+      "transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB"
      ],
      "language": "python",
      "metadata": {},
@@ -79,25 +88,36 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "Run a classification pass"
+      "Classify the image by reshaping the net for the single input then doing the forward pass."
      ]
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "scores = net.predict([caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')])"
+      "net.blobs['data'].reshape(1,3,227,227)\n",
+      "net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))\n",
+      "out = net.forward()\n",
+      "print(\"Predicted class is #{}.\".format(out['prob'].argmax()))"
      ],
      "language": "python",
      "metadata": {},
-     "outputs": [],
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Predicted class is #281.\n"
+       ]
+      }
+     ],
      "prompt_number": 3
     },
     {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "The layer features and their shapes (10 is the batch size, corresponding to the the ten subcrops used by Krizhevsky et al.)"
+      "The layer features and their shapes (1 is the batch size, corresponding to the single input image in this example)."
      ]
     },
     {
@@ -114,21 +134,21 @@
        "output_type": "pyout",
        "prompt_number": 4,
        "text": [
-        "[('data', (10, 3, 227, 227)),\n",
-        " ('conv1', (10, 96, 55, 55)),\n",
-        " ('pool1', (10, 96, 27, 27)),\n",
-        " ('norm1', (10, 96, 27, 27)),\n",
-        " ('conv2', (10, 256, 27, 27)),\n",
-        " ('pool2', (10, 256, 13, 13)),\n",
-        " ('norm2', (10, 256, 13, 13)),\n",
-        " ('conv3', (10, 384, 13, 13)),\n",
-        " ('conv4', (10, 384, 13, 13)),\n",
-        " ('conv5', (10, 256, 13, 13)),\n",
-        " ('pool5', (10, 256, 6, 6)),\n",
-        " ('fc6', (10, 4096, 1, 1)),\n",
-        " ('fc7', (10, 4096, 1, 1)),\n",
-        " ('fc8', (10, 1000, 1, 1)),\n",
-        " ('prob', (10, 1000, 1, 1))]"
+        "[('data', (1, 3, 227, 227)),\n",
+        " ('conv1', (1, 96, 55, 55)),\n",
+        " ('pool1', (1, 96, 27, 27)),\n",
+        " ('norm1', (1, 96, 27, 27)),\n",
+        " ('conv2', (1, 256, 27, 27)),\n",
+        " ('pool2', (1, 256, 13, 13)),\n",
+        " ('norm2', (1, 256, 13, 13)),\n",
+        " ('conv3', (1, 384, 13, 13)),\n",
+        " ('conv4', (1, 384, 13, 13)),\n",
+        " ('conv5', (1, 256, 13, 13)),\n",
+        " ('pool5', (1, 256, 6, 6)),\n",
+        " ('fc6', (1, 4096)),\n",
+        " ('fc7', (1, 4096)),\n",
+        " ('fc8', (1, 1000)),\n",
+        " ('prob', (1, 1000))]"
        ]
       }
      ],
@@ -138,7 +158,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "The parameters and their shapes (each of these layers also has biases which are omitted here)"
+      "The parameters and their shapes. The parameters are `net.params['name'][0]` while biases are `net.params['name'][1]`."
      ]
     },
     {
@@ -160,9 +180,9 @@
         " ('conv3', (384, 256, 3, 3)),\n",
         " ('conv4', (384, 192, 3, 3)),\n",
         " ('conv5', (256, 192, 3, 3)),\n",
-        " ('fc6', (1, 1, 4096, 9216)),\n",
-        " ('fc7', (1, 1, 4096, 4096)),\n",
-        " ('fc8', (1, 1, 1000, 4096))]"
+        " ('fc6', (4096, 9216)),\n",
+        " ('fc7', (4096, 4096)),\n",
+        " ('fc8', (1000, 4096))]"
        ]
       }
      ],
@@ -180,7 +200,7 @@
      "collapsed": false,
      "input": [
       "# take an array of shape (n, height, width) or (n, height, width, channels)\n",
-      "#  and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
+      "# and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
       "def vis_square(data, padsize=1, padval=0):\n",
       "    data -= data.min()\n",
       "    data /= data.max()\n",
@@ -212,8 +232,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "# index four is the center crop\n",
-      "plt.imshow(net.transformer.deprocess('data', net.blobs['data'].data[4]))"
+      "plt.imshow(transformer.deprocess('data', net.blobs['data'].data[0]))"
      ],
      "language": "python",
      "metadata": {},
@@ -269,7 +288,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['conv1'].data[4, :36]\n",
+      "feat = net.blobs['conv1'].data[0, :36]\n",
       "vis_square(feat, padval=1)"
      ],
      "language": "python",
@@ -327,7 +346,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['conv2'].data[4, :36]\n",
+      "feat = net.blobs['conv2'].data[0, :36]\n",
       "vis_square(feat, padval=1)"
      ],
      "language": "python",
@@ -355,7 +374,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['conv3'].data[4]\n",
+      "feat = net.blobs['conv3'].data[0]\n",
       "vis_square(feat, padval=0.5)"
      ],
      "language": "python",
@@ -383,7 +402,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['conv4'].data[4]\n",
+      "feat = net.blobs['conv4'].data[0]\n",
       "vis_square(feat, padval=0.5)"
      ],
      "language": "python",
@@ -411,7 +430,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['conv5'].data[4]\n",
+      "feat = net.blobs['conv5'].data[0]\n",
       "vis_square(feat, padval=0.5)"
      ],
      "language": "python",
@@ -439,7 +458,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['pool5'].data[4]\n",
+      "feat = net.blobs['pool5'].data[0]\n",
       "vis_square(feat, padval=1)"
      ],
      "language": "python",
@@ -469,7 +488,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['fc6'].data[4]\n",
+      "feat = net.blobs['fc6'].data[0]\n",
       "plt.subplot(2, 1, 1)\n",
       "plt.plot(feat.flat)\n",
       "plt.subplot(2, 1, 2)\n",
@@ -500,7 +519,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['fc7'].data[4]\n",
+      "feat = net.blobs['fc7'].data[0]\n",
       "plt.subplot(2, 1, 1)\n",
       "plt.plot(feat.flat)\n",
       "plt.subplot(2, 1, 2)\n",
@@ -531,7 +550,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "feat = net.blobs['prob'].data[4]\n",
+      "feat = net.blobs['prob'].data[0]\n",
       "plt.plot(feat.flat)"
      ],
      "language": "python",
@@ -576,7 +595,7 @@
       "    labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n",
       "\n",
       "# sort top k predictions from softmax output\n",
-      "top_k = net.blobs['prob'].data[4].flatten().argsort()[-1:-6:-1]\n",
+      "top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]\n",
       "print labels[top_k]"
      ],
      "language": "python",
-- 
2.7.4