pycaffe Detector crops with surrounding context

author Evan Shelhamer <shelhamer@imaginarynumber.net>

Mon, 9 Jun 2014 03:31:35 +0000 (20:31 -0700)

committer Evan Shelhamer <shelhamer@imaginarynumber.net>

Tue, 10 Jun 2014 01:14:25 +0000 (18:14 -0700)
author Evan Shelhamer <shelhamer@imaginarynumber.net>
Mon, 9 Jun 2014 03:31:35 +0000 (20:31 -0700)
committer Evan Shelhamer <shelhamer@imaginarynumber.net>
Tue, 10 Jun 2014 01:14:25 +0000 (18:14 -0700)
diff --git a/python/caffe/detector.py b/python/caffe/detector.py

index 5a30dab..b4e9602 100644 (file)
--- a/python/caffe/detector.py
+++ b/python/caffe/detector.py
@@ -12,10 +12,6 @@ This implementation follows ideas in
  The selective_search_ijcv_with_python code required for the selective search
  proposal mode is available at
      https://github.com/sergeyk/selective_search_ijcv_with_python
-
-TODO
-- R-CNN crop mode / crop with context.
-- Bundle with R-CNN model for example.
  """
  import numpy as np
  import os
@@ -29,11 +25,14 @@ class Detector(caffe.Net):
      selective search proposals.
      """
      def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
-                 input_scale=None, channel_swap=None):
+                 input_scale=None, channel_swap=None, context_pad=None):
          """
          Take
          gpu, mean_file, input_scale, channel_swap: convenience params for
              setting mode, mean, input scale, and channel order.
+        context_pad: amount of surrounding context to take s.t. a `context_pad`
+            sized border of pixels in the network input image is context, as in
+            R-CNN feature extraction.
          """
          caffe.Net.__init__(self, model_file, pretrained_file)
          self.set_phase_test()
@@ -50,6 +49,8 @@ class Detector(caffe.Net):
          if channel_swap:
              self.set_channel_swap(self.inputs[0], channel_swap)
  
+        self.configure_crop(context_pad)
+
  
      def detect_windows(self, images_windows):
          """
@@ -58,6 +59,7 @@ class Detector(caffe.Net):
  
          Take
          images_windows: (image filename, window list) iterable.
+        context_crop: size of context border to crop in pixels.
  
          Give
          detections: list of {filename: image filename, window: crop coordinates,
@@ -68,8 +70,7 @@ class Detector(caffe.Net):
          for image_fname, windows in images_windows:
              image = caffe.io.load_image(image_fname).astype(np.float32)
              for window in windows:
-                window_inputs.append(image[window[0]:window[2],
-                                           window[1]:window[3]])
+                window_inputs.append(self.crop(image, window))
  
          # Run through the net (warping windows to input dimensions).
          caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in)
@@ -109,3 +110,79 @@ class Detector(caffe.Net):
          windows_list = selective_search.get_windows(image_fnames)
          # Run windowed detection on the selective search list.
          return self.detect_windows(zip(image_fnames, windows_list))
+
+
+    def crop(self, im, window):
+        """
+        Crop a window from the image for detection. Include surrounding context
+        according to the `context_pad` configuration.
+
+        Take
+        im: H x W x K image ndarray to crop.
+        window: bounding box coordinates as ymin, xmin, ymax, xmax.
+
+        Give
+        crop: cropped window.
+        """
+        # Crop window from the image.
+        crop = im[window[0]:window[2], window[1]:window[3]]
+
+        if self.context_pad:
+            box = window.copy()
+            crop_size = self.blobs[self.inputs[0]].width  # assumes square
+            scale = crop_size / (1. * crop_size - self.context_pad * 2)
+            # Crop a box + surrounding context.
+            half_h = (box[2] - box[0] + 1) / 2.
+            half_w = (box[3] - box[1] + 1) / 2.
+            center = (box[0] + half_h, box[1] + half_w)
+            scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
+            box = np.round(np.tile(center, 2) + scaled_dims)
+            full_h = box[2] - box[0] + 1
+            full_w = box[3] - box[1] + 1
+            scale_h = crop_size / full_h
+            scale_w = crop_size / full_w
+            pad_y = round(max(0, -box[0]) * scale_h)  # amount out-of-bounds
+            pad_x = round(max(0, -box[1]) * scale_w)
+
+            # Clip box to image dimensions.
+            im_h, im_w = im.shape[:2]
+            box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
+            clip_h = box[2] - box[0] + 1
+            clip_w = box[3] - box[1] + 1
+            assert(clip_h > 0 and clip_w > 0)
+            crop_h = round(clip_h * scale_h)
+            crop_w = round(clip_w * scale_w)
+            if pad_y + crop_h > crop_size:
+                crop_h = crop_size - pad_y
+            if pad_x + crop_w > crop_size:
+                crop_w = crop_size - pad_x
+
+            # collect with context padding and place in input
+            # with mean padding
+            context_crop = im[box[0]:box[2], box[1]:box[3]]
+            context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
+            crop = self.crop_mean.copy()
+            crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop
+
+        return crop
+
+
+    def configure_crop(self, context_pad):
+        """
+        Configure amount of context for cropping.
+        If context is included, make the special input mean for context padding.
+
+        Take
+        context_pad: amount of context for cropping.
+        """
+        self.context_pad = context_pad
+        if self.context_pad:
+            input_scale = self.input_scale.get(self.inputs[0])
+            channel_order = self.channel_swap.get(self.inputs[0])
+            # Padding context crops needs the mean in unprocessed input space.
+            self.crop_mean = self.mean[self.inputs[0]].copy()
+            self.crop_mean = self.crop_mean.transpose((1,2,0))
+            channel_order_inverse = [channel_order.index(i)
+                                     for i in range(self.crop_mean.shape[2])]
+            self.crop_mean = self.crop_mean[:,:, channel_order_inverse]
+            self.crop_mean /= input_scale
diff --git a/python/detect.py b/python/detect.py

index 05b5244..a3bee5c 100755 (executable)
--- a/python/detect.py
+++ b/python/detect.py
@@ -86,6 +86,12 @@ def main(argv):
               "RGB -> BGR since BGR is the Caffe default by way of OpenCV."
  
      )
+    parser.add_argument(
+        "--context_pad",
+        type=int,
+        default='16',
+        help="Amount of surrounding context to collect in input window."
+    )
      args = parser.parse_args()
  
      channel_swap = [int(s) for s in args.channel_swap.split(',')]
@@ -93,7 +99,8 @@ def main(argv):
      # Make detector.
      detector = caffe.Detector(args.model_def, args.pretrained_model,
              gpu=args.gpu, mean_file=args.mean_file,
-            input_scale=args.input_scale, channel_swap=channel_swap)
+            input_scale=args.input_scale, channel_swap=channel_swap,
+            context_pad=args.context_pad)
  
      if args.gpu:
          print 'GPU mode'
author	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Mon, 9 Jun 2014 03:31:35 +0000 (20:31 -0700)
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Tue, 10 Jun 2014 01:14:25 +0000 (18:14 -0700)
python/caffe/detector.py		patch \| blob \| history
python/detect.py		patch \| blob \| history