3 detector.py is an out-of-the-box windowed detector
4 callable from the command line.
6 By default it configures and runs the Caffe reference ImageNet model.
7 Note that this model was trained for image classification and not detection,
8 and finetuning for detection can be expected to improve results.
10 The selective_search_ijcv_with_python code required for the selective search
11 proposal mode is available at
12 https://github.com/sergeyk/selective_search_ijcv_with_python
15 - batch up image filenames as well: don't want to load all of them into memory
16 - come up with a batching scheme that preserved order / keeps a unique ID
26 CROP_MODES = ['list', 'selective_search']
27 COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
31 pycaffe_dir = os.path.dirname(__file__)
33 parser = argparse.ArgumentParser()
34 # Required arguments: input and output.
37 help="Input txt/csv filename. If .txt, must be list of filenames.\
38 If .csv, must be comma-separated file with header\
39 'filename, xmin, ymin, xmax, ymax'"
43 help="Output h5/csv filename. Format depends on extension."
48 default=os.path.join(pycaffe_dir,
49 "../models/bvlc_reference_caffenet/deploy.prototxt"),
50 help="Model definition file."
54 default=os.path.join(pycaffe_dir,
55 "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"),
56 help="Trained model weights file."
60 default="selective_search",
62 help="How to generate windows for detection."
67 help="Switch for gpu computation."
71 default=os.path.join(pycaffe_dir,
72 'caffe/imagenet/ilsvrc_2012_mean.npy'),
73 help="Data set image mean of H x W x K dimensions (numpy array). " +
74 "Set to '' for no mean subtraction."
79 help="Multiply input features by this scale to finish preprocessing."
85 help="Multiply raw input by this scale before preprocessing."
90 help="Order to permute input channels. The default converts " +
91 "RGB -> BGR since BGR is the Caffe default by way of OpenCV."
98 help="Amount of surrounding context to collect in input window."
100 args = parser.parse_args()
102 mean, channel_swap = None, None
104 mean = np.load(args.mean_file)
105 if mean.shape[1:] != (1, 1):
106 mean = mean.mean(1).mean(1)
107 if args.channel_swap:
108 channel_swap = [int(s) for s in args.channel_swap.split(',')]
118 detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean,
119 input_scale=args.input_scale, raw_scale=args.raw_scale,
120 channel_swap=channel_swap,
121 context_pad=args.context_pad)
125 print("Loading input...")
126 if args.input_file.lower().endswith('txt'):
127 with open(args.input_file) as f:
128 inputs = [_.strip() for _ in f.readlines()]
129 elif args.input_file.lower().endswith('csv'):
130 inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
131 inputs.set_index('filename', inplace=True)
133 raise Exception("Unknown input file type: not in txt or csv.")
136 if args.crop_mode == 'list':
137 # Unpack sequence of (image filename, windows).
139 (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
140 for ix in inputs.index.unique()
142 detections = detector.detect_windows(images_windows)
144 detections = detector.detect_selective_search(inputs)
145 print("Processed {} windows in {:.3f} s.".format(len(detections),
148 # Collect into dataframe with labeled fields.
149 df = pd.DataFrame(detections)
150 df.set_index('filename', inplace=True)
151 df[COORD_COLS] = pd.DataFrame(
152 data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
157 if args.output_file.lower().endswith('csv'):
159 # Enumerate the class probabilities.
160 class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
161 df[class_cols] = pd.DataFrame(
162 data=np.vstack(df['feat']), index=df.index, columns=class_cols)
163 df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
166 df.to_hdf(args.output_file, 'df', mode='w')
167 print("Saved to {} in {:.3f} s.".format(args.output_file,
171 if __name__ == "__main__":