From 4992abecec7214bce3c07497438c2e1ff963e657 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sun, 8 Jun 2014 20:31:35 -0700 Subject: [PATCH] pycaffe Detector crops with surrounding context - caffe.Detector learned how to crop windows with context in the R-CNN style s.t. the bordero of the network input is a given amount of context. - add --context_pad arg to detect.py for amount of context. Default is 16, as in R-CNN. --- python/caffe/detector.py | 91 ++++++++++++++++++++++++++++++++++++++++++++---- python/detect.py | 9 ++++- 2 files changed, 92 insertions(+), 8 deletions(-) diff --git a/python/caffe/detector.py b/python/caffe/detector.py index 5a30dab..b4e9602 100644 --- a/python/caffe/detector.py +++ b/python/caffe/detector.py @@ -12,10 +12,6 @@ This implementation follows ideas in The selective_search_ijcv_with_python code required for the selective search proposal mode is available at https://github.com/sergeyk/selective_search_ijcv_with_python - -TODO -- R-CNN crop mode / crop with context. -- Bundle with R-CNN model for example. """ import numpy as np import os @@ -29,11 +25,14 @@ class Detector(caffe.Net): selective search proposals. """ def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None, - input_scale=None, channel_swap=None): + input_scale=None, channel_swap=None, context_pad=None): """ Take gpu, mean_file, input_scale, channel_swap: convenience params for setting mode, mean, input scale, and channel order. + context_pad: amount of surrounding context to take s.t. a `context_pad` + sized border of pixels in the network input image is context, as in + R-CNN feature extraction. """ caffe.Net.__init__(self, model_file, pretrained_file) self.set_phase_test() @@ -50,6 +49,8 @@ class Detector(caffe.Net): if channel_swap: self.set_channel_swap(self.inputs[0], channel_swap) + self.configure_crop(context_pad) + def detect_windows(self, images_windows): """ @@ -58,6 +59,7 @@ class Detector(caffe.Net): Take images_windows: (image filename, window list) iterable. + context_crop: size of context border to crop in pixels. Give detections: list of {filename: image filename, window: crop coordinates, @@ -68,8 +70,7 @@ class Detector(caffe.Net): for image_fname, windows in images_windows: image = caffe.io.load_image(image_fname).astype(np.float32) for window in windows: - window_inputs.append(image[window[0]:window[2], - window[1]:window[3]]) + window_inputs.append(self.crop(image, window)) # Run through the net (warping windows to input dimensions). caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in) @@ -109,3 +110,79 @@ class Detector(caffe.Net): windows_list = selective_search.get_windows(image_fnames) # Run windowed detection on the selective search list. return self.detect_windows(zip(image_fnames, windows_list)) + + + def crop(self, im, window): + """ + Crop a window from the image for detection. Include surrounding context + according to the `context_pad` configuration. + + Take + im: H x W x K image ndarray to crop. + window: bounding box coordinates as ymin, xmin, ymax, xmax. + + Give + crop: cropped window. + """ + # Crop window from the image. + crop = im[window[0]:window[2], window[1]:window[3]] + + if self.context_pad: + box = window.copy() + crop_size = self.blobs[self.inputs[0]].width # assumes square + scale = crop_size / (1. * crop_size - self.context_pad * 2) + # Crop a box + surrounding context. + half_h = (box[2] - box[0] + 1) / 2. + half_w = (box[3] - box[1] + 1) / 2. + center = (box[0] + half_h, box[1] + half_w) + scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w)) + box = np.round(np.tile(center, 2) + scaled_dims) + full_h = box[2] - box[0] + 1 + full_w = box[3] - box[1] + 1 + scale_h = crop_size / full_h + scale_w = crop_size / full_w + pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds + pad_x = round(max(0, -box[1]) * scale_w) + + # Clip box to image dimensions. + im_h, im_w = im.shape[:2] + box = np.clip(box, 0., [im_h, im_w, im_h, im_w]) + clip_h = box[2] - box[0] + 1 + clip_w = box[3] - box[1] + 1 + assert(clip_h > 0 and clip_w > 0) + crop_h = round(clip_h * scale_h) + crop_w = round(clip_w * scale_w) + if pad_y + crop_h > crop_size: + crop_h = crop_size - pad_y + if pad_x + crop_w > crop_size: + crop_w = crop_size - pad_x + + # collect with context padding and place in input + # with mean padding + context_crop = im[box[0]:box[2], box[1]:box[3]] + context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w)) + crop = self.crop_mean.copy() + crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop + + return crop + + + def configure_crop(self, context_pad): + """ + Configure amount of context for cropping. + If context is included, make the special input mean for context padding. + + Take + context_pad: amount of context for cropping. + """ + self.context_pad = context_pad + if self.context_pad: + input_scale = self.input_scale.get(self.inputs[0]) + channel_order = self.channel_swap.get(self.inputs[0]) + # Padding context crops needs the mean in unprocessed input space. + self.crop_mean = self.mean[self.inputs[0]].copy() + self.crop_mean = self.crop_mean.transpose((1,2,0)) + channel_order_inverse = [channel_order.index(i) + for i in range(self.crop_mean.shape[2])] + self.crop_mean = self.crop_mean[:,:, channel_order_inverse] + self.crop_mean /= input_scale diff --git a/python/detect.py b/python/detect.py index 05b5244..a3bee5c 100755 --- a/python/detect.py +++ b/python/detect.py @@ -86,6 +86,12 @@ def main(argv): "RGB -> BGR since BGR is the Caffe default by way of OpenCV." ) + parser.add_argument( + "--context_pad", + type=int, + default='16', + help="Amount of surrounding context to collect in input window." + ) args = parser.parse_args() channel_swap = [int(s) for s in args.channel_swap.split(',')] @@ -93,7 +99,8 @@ def main(argv): # Make detector. detector = caffe.Detector(args.model_def, args.pretrained_model, gpu=args.gpu, mean_file=args.mean_file, - input_scale=args.input_scale, channel_swap=channel_swap) + input_scale=args.input_scale, channel_swap=channel_swap, + context_pad=args.context_pad) if args.gpu: print 'GPU mode' -- 2.7.4