support for tightest square mode while finetuning
authorRoss Girshick <rbg@eecs.berkeley.edu>
Fri, 17 Jan 2014 00:22:08 +0000 (16:22 -0800)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Thu, 20 Mar 2014 02:08:28 +0000 (19:08 -0700)
models/pascal_finetune.prototxt
models/pascal_finetune_solver.prototxt
models/pascal_finetune_val.prototxt
src/caffe/layers/window_data_layer.cpp
src/caffe/proto/caffe.proto

index d2a33f7..9bc565b 100644 (file)
@@ -8,6 +8,7 @@ layers {
     batchsize: 128
     cropsize: 227
     context_pad: 16
+    crop_mode: "warp"
     mirror: true
     det_fg_threshold: 0.5
     det_bg_threshold: 0.5
index 8c46475..cbdef7f 100644 (file)
@@ -11,4 +11,4 @@ max_iter: 100000
 momentum: 0.9
 weight_decay: 0.0005
 snapshot: 10000
-snapshot_prefix: "./snapshots/pascal_context16_finetune_train"
+snapshot_prefix: "./snapshots/pascal_warp_context16_finetune_train"
index 18a680f..cece634 100644 (file)
@@ -8,6 +8,7 @@ layers {
     batchsize: 128
     cropsize: 227
     context_pad: 16
+    crop_mode: "warp"
     mirror: true
     det_fg_threshold: 0.5
     det_bg_threshold: 0.5
index e8f52e7..2dae2f9 100644 (file)
@@ -47,6 +47,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
   const int mean_width = layer->data_mean_.width();
   const int mean_height = layer->data_mean_.height();
   cv::Size cv_crop_size(cropsize, cropsize);
+  const string& crop_mode = layer->layer_param_.crop_mode();
+
+  bool use_square = (crop_mode == "square") ? true : false;
 
   // zero out batch
   memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
@@ -93,7 +96,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
 
       int pad_w = 0;
       int pad_h = 0;
-      if (context_pad > 0) {
+      if (context_pad > 0 || use_square) {
         // scale factor by which to expand the original region 
         // such that after warping the expanded region to cropsize x cropsize
         // there's exactly context_pad amount of padding on each side
@@ -105,6 +108,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
         Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0;
         Dtype center_x = static_cast<Dtype>(x1) + half_width;
         Dtype center_y = static_cast<Dtype>(y1) + half_height;
+        if (use_square) {
+          if (half_height > half_width) {
+            half_width = half_height;
+          } else {
+            half_height = half_width;
+          }
+        }
         x1 = static_cast<int>(round(center_x - half_width*context_scale));
         x2 = static_cast<int>(round(center_x + half_width*context_scale));
         y1 = static_cast<int>(round(center_y - half_height*context_scale));
@@ -339,6 +349,8 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   LOG(INFO) << "Amount of context padding: " 
       << this->layer_param_.context_pad();
 
+  LOG(INFO) << "Crop mode: " << this->layer_param_.crop_mode();
+
   // image
   int cropsize = this->layer_param_.cropsize();
   CHECK_GT(cropsize, 0);
index 5f82c19..b86e5f3 100644 (file)
@@ -108,6 +108,11 @@ message LayerParameter {
   // (used only by the window_data_layer)
   optional uint32 context_pad = 58 [default = 0];
 
+  // Mode for cropping out a detection window
+  // warp: cropped window is warped to a fixed size and aspect ratio
+  // square: the tightest square around the window is cropped
+  optional string crop_mode = 59 [default = "warp"];
+
   // For ReshapeLayer, one needs to specify the new dimensions.
   optional int32 new_num = 60 [default = 0];
   optional int32 new_channels = 61 [default = 0];