From 08b595d539c0e624ceb73869ee7c95376d4972e5 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 7 Mar 2015 00:34:24 -0800 Subject: [PATCH] [example] revise filter visualization - download CaffeNet if it isn't there - switch to caffe.Net - reshape net for single input - explain param, bias indexing - update output for N-D blobs --- examples/filter_visualization.ipynb | 109 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/examples/filter_visualization.ipynb b/examples/filter_visualization.ipynb index 0bfdb5c..7125907 100644 --- a/examples/filter_visualization.ipynb +++ b/examples/filter_visualization.ipynb @@ -4,7 +4,7 @@ "example_name": "Filter visualization", "include_in_docs": true, "priority": 2, - "signature": "sha256:44536e4f82eb5748b6a3bb6fcfca01bc6c5815dad2641c994dab031f452b7606" + "signature": "sha256:64c88129e2eeaa956e4c8a26467ff6119f24ea3d7ef15f8217326249973bea8f" }, "nbformat": 3, "nbformat_minor": 0, @@ -24,7 +24,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, import required modules and set plotting parameters" + "First, import required modules, set plotting parameters, and run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model if it hasn't already been fetched." ] }, { @@ -44,7 +44,12 @@ "\n", "plt.rcParams['figure.figsize'] = (10, 10)\n", "plt.rcParams['image.interpolation'] = 'nearest'\n", - "plt.rcParams['image.cmap'] = 'gray'" + "plt.rcParams['image.cmap'] = 'gray'\n", + "\n", + "import os\n", + "if not os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):\n", + " print(\"Downloading pre-trained CaffeNet model...\")\n", + " !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet" ], "language": "python", "metadata": {}, @@ -55,7 +60,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model, load the net, specify test phase and CPU mode, and configure input preprocessing." + "Set Caffe to CPU mode, load the net in the test phase for inference, and configure input preprocessing." ] }, { @@ -63,12 +68,16 @@ "collapsed": false, "input": [ "caffe.set_mode_cpu()\n", - "net = caffe.Classifier(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n", - " caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel')\n", + "net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n", + " caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',\n", + " caffe.TEST)\n", + "\n", "# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n", - "net.transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # ImageNet mean\n", - "net.transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]\n", - "net.transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB" + "transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n", + "transformer.set_transpose('data', (2,0,1))\n", + "transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel\n", + "transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]\n", + "transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB" ], "language": "python", "metadata": {}, @@ -79,25 +88,36 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Run a classification pass" + "Classify the image by reshaping the net for the single input then doing the forward pass." ] }, { "cell_type": "code", "collapsed": false, "input": [ - "scores = net.predict([caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')])" + "net.blobs['data'].reshape(1,3,227,227)\n", + "net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))\n", + "out = net.forward()\n", + "print(\"Predicted class is #{}.\".format(out['prob'].argmax()))" ], "language": "python", "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Predicted class is #281.\n" + ] + } + ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The layer features and their shapes (10 is the batch size, corresponding to the the ten subcrops used by Krizhevsky et al.)" + "The layer features and their shapes (1 is the batch size, corresponding to the single input image in this example)." ] }, { @@ -114,21 +134,21 @@ "output_type": "pyout", "prompt_number": 4, "text": [ - "[('data', (10, 3, 227, 227)),\n", - " ('conv1', (10, 96, 55, 55)),\n", - " ('pool1', (10, 96, 27, 27)),\n", - " ('norm1', (10, 96, 27, 27)),\n", - " ('conv2', (10, 256, 27, 27)),\n", - " ('pool2', (10, 256, 13, 13)),\n", - " ('norm2', (10, 256, 13, 13)),\n", - " ('conv3', (10, 384, 13, 13)),\n", - " ('conv4', (10, 384, 13, 13)),\n", - " ('conv5', (10, 256, 13, 13)),\n", - " ('pool5', (10, 256, 6, 6)),\n", - " ('fc6', (10, 4096, 1, 1)),\n", - " ('fc7', (10, 4096, 1, 1)),\n", - " ('fc8', (10, 1000, 1, 1)),\n", - " ('prob', (10, 1000, 1, 1))]" + "[('data', (1, 3, 227, 227)),\n", + " ('conv1', (1, 96, 55, 55)),\n", + " ('pool1', (1, 96, 27, 27)),\n", + " ('norm1', (1, 96, 27, 27)),\n", + " ('conv2', (1, 256, 27, 27)),\n", + " ('pool2', (1, 256, 13, 13)),\n", + " ('norm2', (1, 256, 13, 13)),\n", + " ('conv3', (1, 384, 13, 13)),\n", + " ('conv4', (1, 384, 13, 13)),\n", + " ('conv5', (1, 256, 13, 13)),\n", + " ('pool5', (1, 256, 6, 6)),\n", + " ('fc6', (1, 4096)),\n", + " ('fc7', (1, 4096)),\n", + " ('fc8', (1, 1000)),\n", + " ('prob', (1, 1000))]" ] } ], @@ -138,7 +158,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The parameters and their shapes (each of these layers also has biases which are omitted here)" + "The parameters and their shapes. The parameters are `net.params['name'][0]` while biases are `net.params['name'][1]`." ] }, { @@ -160,9 +180,9 @@ " ('conv3', (384, 256, 3, 3)),\n", " ('conv4', (384, 192, 3, 3)),\n", " ('conv5', (256, 192, 3, 3)),\n", - " ('fc6', (1, 1, 4096, 9216)),\n", - " ('fc7', (1, 1, 4096, 4096)),\n", - " ('fc8', (1, 1, 1000, 4096))]" + " ('fc6', (4096, 9216)),\n", + " ('fc7', (4096, 4096)),\n", + " ('fc8', (1000, 4096))]" ] } ], @@ -180,7 +200,7 @@ "collapsed": false, "input": [ "# take an array of shape (n, height, width) or (n, height, width, channels)\n", - "# and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n", + "# and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n", "def vis_square(data, padsize=1, padval=0):\n", " data -= data.min()\n", " data /= data.max()\n", @@ -212,8 +232,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "# index four is the center crop\n", - "plt.imshow(net.transformer.deprocess('data', net.blobs['data'].data[4]))" + "plt.imshow(transformer.deprocess('data', net.blobs['data'].data[0]))" ], "language": "python", "metadata": {}, @@ -269,7 +288,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['conv1'].data[4, :36]\n", + "feat = net.blobs['conv1'].data[0, :36]\n", "vis_square(feat, padval=1)" ], "language": "python", @@ -327,7 +346,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['conv2'].data[4, :36]\n", + "feat = net.blobs['conv2'].data[0, :36]\n", "vis_square(feat, padval=1)" ], "language": "python", @@ -355,7 +374,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['conv3'].data[4]\n", + "feat = net.blobs['conv3'].data[0]\n", "vis_square(feat, padval=0.5)" ], "language": "python", @@ -383,7 +402,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['conv4'].data[4]\n", + "feat = net.blobs['conv4'].data[0]\n", "vis_square(feat, padval=0.5)" ], "language": "python", @@ -411,7 +430,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['conv5'].data[4]\n", + "feat = net.blobs['conv5'].data[0]\n", "vis_square(feat, padval=0.5)" ], "language": "python", @@ -439,7 +458,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['pool5'].data[4]\n", + "feat = net.blobs['pool5'].data[0]\n", "vis_square(feat, padval=1)" ], "language": "python", @@ -469,7 +488,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['fc6'].data[4]\n", + "feat = net.blobs['fc6'].data[0]\n", "plt.subplot(2, 1, 1)\n", "plt.plot(feat.flat)\n", "plt.subplot(2, 1, 2)\n", @@ -500,7 +519,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['fc7'].data[4]\n", + "feat = net.blobs['fc7'].data[0]\n", "plt.subplot(2, 1, 1)\n", "plt.plot(feat.flat)\n", "plt.subplot(2, 1, 2)\n", @@ -531,7 +550,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "feat = net.blobs['prob'].data[4]\n", + "feat = net.blobs['prob'].data[0]\n", "plt.plot(feat.flat)" ], "language": "python", @@ -576,7 +595,7 @@ " labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n", "\n", "# sort top k predictions from softmax output\n", - "top_k = net.blobs['prob'].data[4].flatten().argsort()[-1:-6:-1]\n", + "top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]\n", "print labels[top_k]" ], "language": "python", -- 2.7.4