add caffe.io submodule for conversions, image loading and resizing
authorEvan Shelhamer <shelhamer@imaginarynumber.net>
Mon, 19 May 2014 00:13:05 +0000 (17:13 -0700)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Tue, 20 May 2014 06:55:21 +0000 (23:55 -0700)
python/caffe/__init__.py
python/caffe/io.py [moved from python/caffe/convert.py with 52% similarity]
python/caffe/pycaffe.py

index c3bb98f..e5e1062 100644 (file)
@@ -1 +1,2 @@
 from .pycaffe import Net, SGDSolver
+import io
similarity index 52%
rename from python/caffe/convert.py
rename to python/caffe/io.py
index deef657..0bd2f81 100644 (file)
@@ -1,9 +1,84 @@
-#!/usr/bin/env python
-"""This script converts blobproto instances to numpy arrays.
-"""
+import numpy as np
+import skimage.io
+import skimage.transform
 
 from caffe.proto import caffe_pb2
-import numpy as np
+
+
+def load_image(filename):
+    """
+    Load an image converting from grayscale or alpha as needed.
+
+    Take
+    filename: string
+
+    Give
+    image: an image of size (H x W x 3) with RGB channels of type uint8.
+    """
+    img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
+    if img.ndim == 2:
+        img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
+    elif img.shape[2] == 4:
+        img = img[:, :, :3]
+    return img
+
+
+def resize_image(im, new_dims, interp_order=1):
+    """
+    Resize an image array with interpolation.
+
+    Take
+    im: (H x W x K) ndarray
+    new_dims: (height, width) tuple of new dimensions.
+    interp_order: interpolation order, default is linear.
+
+    Give
+    im: resized ndarray with shape (new_dims[0], new_dims[1], K)
+    """
+    return skimage.transform.resize(im, new_dims, order=interp_order)
+
+
+def oversample(images, crop_dims):
+    """
+    Crop images into the four corners, center, and their mirrored versions.
+
+    Take
+    image: iterable of (H x W x K) ndarrays
+    crop_dims: (height, width) tuple for the crops.
+
+    Give
+    crops: (10*N x H x W x K) ndarray of crops for number of inputs N.
+    """
+    # Dimensions and center.
+    im_shape = np.array(images[0].shape)
+    crop_dims = np.array(crop_dims)
+    im_center = im_shape[:2] / 2.0
+
+    # Make crop coordinates
+    h_indices = (0, im_shape[0] - crop_dims[0])
+    w_indices = (0, im_shape[1] - crop_dims[1])
+    crops_ix = np.empty((5, 4), dtype=int)
+    curr = 0
+    for i in h_indices:
+        for j in w_indices:
+            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
+            curr += 1
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
+        -crop_dims / 2.0,
+         crop_dims / 2.0
+    ])
+    crops_ix = np.tile(crops_ix, (2, 1))
+
+    # Extract crops
+    crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1],
+                            im_shape[-1]), dtype=np.float32)
+    ix = 0
+    for im in images:
+        for crop in crops_ix:
+            crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
+            ix += 1
+        crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
+    return crops
 
 
 def blobproto_to_array(blob, return_diff=False):
index 9b1ed80..72ae5fb 100644 (file)
@@ -6,9 +6,9 @@ interface.
 from collections import OrderedDict
 from itertools import izip_longest
 import numpy as np
-from scipy.ndimage import zoom
 
 from ._caffe import Net, SGDSolver
+import caffe.io
 
 # We directly update methods from Net here (rather than using composition or
 # inheritance) so that nets created by caffe (e.g., by SGDSolver) will
@@ -197,8 +197,11 @@ def _Net_set_mean(self, input_, mean_f, mode='elementwise'):
     mean = np.load(mean_f)
     if mode == 'elementwise':
         if mean.shape != in_shape[1:]:
-            mean = caffe.io.resize_image(mean.transpose((1,2,0)),
-                    in_shape[2:]).transpose((2,0,1))
+            # Resize mean (which requires H x W x K input in range [0,1]).
+            m_min, m_max = mean.min(), mean.max()
+            normal_mean = (mean - m_min) / (m_max - m_min)
+            mean = caffe.io.resize_image(normal_mean.transpose((1,2,0)),
+                    in_shape[2:]).transpose((2,0,1)) * (m_max - m_min) + m_min
         self.mean[input_] = mean
     elif mode == 'channel':
         self.mean[input_] = mean.mean(1).mean(1).reshape((in_shape[1], 1, 1))
@@ -258,11 +261,9 @@ def _Net_preprocess(self, input_name, inputs):
         input_scale = self.input_scale.get(input_name)
         channel_order = self.channel_swap.get(input_name)
         mean = self.mean.get(input_name)
-        in_dims = self.blobs[input_name].data.shape[2:]
-        if caffe_in.shape[:2] != in_dims:
-            scale_h = in_dims[0] / float(caffe_in.shape[0])
-            scale_w = in_dims[1] / float(caffe_in.shape[1])
-            caffe_in = zoom(caffe_in, (scale_h, scale_w, 1), order=1)
+        in_size = self.blobs[input_name].data.shape[2:]
+        if caffe_in.shape[:2] != in_size:
+            caffe_in = caffe.io.resize_image(caffe_in, in_size)
         if input_scale:
             caffe_in *= input_scale
         if channel_order: