From 13392972d930c92a2efda106839a5e879e28d741 Mon Sep 17 00:00:00 2001 From: Ross Girshick Date: Thu, 16 Jan 2014 16:22:08 -0800 Subject: [PATCH] support for tightest square mode while finetuning --- models/pascal_finetune.prototxt | 1 + models/pascal_finetune_solver.prototxt | 2 +- models/pascal_finetune_val.prototxt | 1 + src/caffe/layers/window_data_layer.cpp | 14 +++++++++++++- src/caffe/proto/caffe.proto | 5 +++++ 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/models/pascal_finetune.prototxt b/models/pascal_finetune.prototxt index d2a33f7..9bc565b 100644 --- a/models/pascal_finetune.prototxt +++ b/models/pascal_finetune.prototxt @@ -8,6 +8,7 @@ layers { batchsize: 128 cropsize: 227 context_pad: 16 + crop_mode: "warp" mirror: true det_fg_threshold: 0.5 det_bg_threshold: 0.5 diff --git a/models/pascal_finetune_solver.prototxt b/models/pascal_finetune_solver.prototxt index 8c46475..cbdef7f 100644 --- a/models/pascal_finetune_solver.prototxt +++ b/models/pascal_finetune_solver.prototxt @@ -11,4 +11,4 @@ max_iter: 100000 momentum: 0.9 weight_decay: 0.0005 snapshot: 10000 -snapshot_prefix: "./snapshots/pascal_context16_finetune_train" +snapshot_prefix: "./snapshots/pascal_warp_context16_finetune_train" diff --git a/models/pascal_finetune_val.prototxt b/models/pascal_finetune_val.prototxt index 18a680f..cece634 100644 --- a/models/pascal_finetune_val.prototxt +++ b/models/pascal_finetune_val.prototxt @@ -8,6 +8,7 @@ layers { batchsize: 128 cropsize: 227 context_pad: 16 + crop_mode: "warp" mirror: true det_fg_threshold: 0.5 det_bg_threshold: 0.5 diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index e8f52e7..2dae2f9 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -47,6 +47,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { const int mean_width = layer->data_mean_.width(); const int mean_height = layer->data_mean_.height(); cv::Size cv_crop_size(cropsize, cropsize); + const string& crop_mode = layer->layer_param_.crop_mode(); + + bool use_square = (crop_mode == "square") ? true : false; // zero out batch memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count()); @@ -93,7 +96,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { int pad_w = 0; int pad_h = 0; - if (context_pad > 0) { + if (context_pad > 0 || use_square) { // scale factor by which to expand the original region // such that after warping the expanded region to cropsize x cropsize // there's exactly context_pad amount of padding on each side @@ -105,6 +108,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { Dtype half_width = static_cast(x2-x1+1)/2.0; Dtype center_x = static_cast(x1) + half_width; Dtype center_y = static_cast(y1) + half_height; + if (use_square) { + if (half_height > half_width) { + half_width = half_height; + } else { + half_height = half_width; + } + } x1 = static_cast(round(center_x - half_width*context_scale)); x2 = static_cast(round(center_x + half_width*context_scale)); y1 = static_cast(round(center_y - half_height*context_scale)); @@ -339,6 +349,8 @@ void WindowDataLayer::SetUp(const vector*>& bottom, LOG(INFO) << "Amount of context padding: " << this->layer_param_.context_pad(); + LOG(INFO) << "Crop mode: " << this->layer_param_.crop_mode(); + // image int cropsize = this->layer_param_.cropsize(); CHECK_GT(cropsize, 0); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5f82c19..b86e5f3 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -108,6 +108,11 @@ message LayerParameter { // (used only by the window_data_layer) optional uint32 context_pad = 58 [default = 0]; + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string crop_mode = 59 [default = "warp"]; + // For ReshapeLayer, one needs to specify the new dimensions. optional int32 new_num = 60 [default = 0]; optional int32 new_channels = 61 [default = 0]; -- 2.7.4