// caffe.proto > LayerParameter
// 'source' field specifies the window_file
-// 'cropsize' indicates the desired warped size
+// 'crop_size' indicates the desired warped size
namespace caffe {
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
- const Dtype scale = layer->layer_param_.scale();
- const int batchsize = layer->layer_param_.batchsize();
- const int cropsize = layer->layer_param_.cropsize();
- const int context_pad = layer->layer_param_.det_context_pad();
- const bool mirror = layer->layer_param_.mirror();
- const float fg_fraction = layer->layer_param_.det_fg_fraction();
+ const Dtype scale = layer->layer_param_.window_data_param().scale();
+ const int batch_size = layer->layer_param_.window_data_param().batch_size();
+ const int crop_size = layer->layer_param_.window_data_param().crop_size();
+ const int context_pad = layer->layer_param_.window_data_param().context_pad();
+ const bool mirror = layer->layer_param_.window_data_param().mirror();
+ const float fg_fraction =
+ layer->layer_param_.window_data_param().fg_fraction();
const Dtype* mean = layer->data_mean_.cpu_data();
- const int mean_off = (layer->data_mean_.width() - cropsize) / 2;
+ const int mean_off = (layer->data_mean_.width() - crop_size) / 2;
const int mean_width = layer->data_mean_.width();
const int mean_height = layer->data_mean_.height();
- cv::Size cv_crop_size(cropsize, cropsize);
- const string& crop_mode = layer->layer_param_.det_crop_mode();
+ cv::Size cv_crop_size(crop_size, crop_size);
+ const string& crop_mode = layer->layer_param_.window_data_param().crop_mode();
bool use_square = (crop_mode == "square") ? true : false;
// zero out batch
memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
- const int num_fg = static_cast<int>(static_cast<float>(batchsize)
+ const int num_fg = static_cast<int>(static_cast<float>(batch_size)
* fg_fraction);
- const int num_samples[2] = { batchsize - num_fg, num_fg };
+ const int num_samples[2] = { batch_size - num_fg, num_fg };
int itemid = 0;
// sample from bg set then fg set
int pad_h = 0;
if (context_pad > 0 || use_square) {
// scale factor by which to expand the original region
- // such that after warping the expanded region to cropsize x cropsize
+ // such that after warping the expanded region to crop_size x crop_size
// there's exactly context_pad amount of padding on each side
- Dtype context_scale = static_cast<Dtype>(cropsize) /
- static_cast<Dtype>(cropsize - 2*context_pad);
+ Dtype context_scale = static_cast<Dtype>(crop_size) /
+ static_cast<Dtype>(crop_size - 2*context_pad);
// compute the expanded region
Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;
// scale factors that would be used to warp the unclipped
// expanded region
Dtype scale_x =
- static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_width);
+ static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width);
Dtype scale_y =
- static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_height);
+ static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height);
// size to warp the clipped expanded region to
cv_crop_size.width =
pad_w = pad_x1;
- // ensure that the warped, clipped region plus the padding
- // fits in the cropsize x cropsize image (it might not due to rounding)
- if (pad_h + cv_crop_size.height > cropsize) {
- cv_crop_size.height = cropsize - pad_h;
+ // ensure that the warped, clipped region plus the padding fits in the
+ // crop_size x crop_size image (it might not due to rounding)
+ if (pad_h + cv_crop_size.height > crop_size) {
+ cv_crop_size.height = crop_size - pad_h;
- if (pad_w + cv_crop_size.width > cropsize) {
- cv_crop_size.width = cropsize - pad_w;
+ if (pad_w + cv_crop_size.width > crop_size) {
+ cv_crop_size.width = crop_size - pad_w;
Dtype pixel =
static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
- top_data[((itemid * channels + c) * cropsize + h + pad_h)
- * cropsize + w + pad_w]
+ top_data[((itemid * channels + c) * crop_size + h + pad_h)
+ * crop_size + w + pad_w]
= (pixel
- mean[(c * mean_height + h + mean_off + pad_h)
* mean_width + w + mean_off + pad_w])
std::ofstream::out | std::ofstream::binary);
for (int c = 0; c < channels; ++c) {
- for (int h = 0; h < cropsize; ++h) {
- for (int w = 0; w < cropsize; ++w) {
+ for (int h = 0; h < crop_size; ++h) {
+ for (int w = 0; w < crop_size; ++w) {
- &top_data[((itemid * channels + c) * cropsize + h)
- * cropsize + w]),
+ &top_data[((itemid * channels + c) * crop_size + h)
+ * crop_size + w]),
LOG(INFO) << "Window data layer:" << std::endl
<< " foreground (object) overlap threshold: "
- << this->layer_param_.det_fg_threshold() << std::endl
+ << this->layer_param_.window_data_param().fg_threshold() << std::endl
<< " background (non-object) overlap threshold: "
- << this->layer_param_.det_bg_threshold() << std::endl
+ << this->layer_param_.window_data_param().bg_threshold() << std::endl
<< " foreground sampling fraction: "
- << this->layer_param_.det_fg_fraction();
+ << this->layer_param_.window_data_param().fg_fraction();
- std::ifstream infile(this->layer_param_.source().c_str());
+ std::ifstream infile(this->layer_param_.window_data_param().source().c_str());
CHECK(infile.good()) << "Failed to open window file "
- << this->layer_param_.source() << std::endl;
+ << this->layer_param_.window_data_param().source() << std::endl;
map<int, int> label_hist;
label_hist.insert(std::make_pair(0, 0));
// read each box
int num_windows;
infile >> num_windows;
+ const float fg_threshold =
+ this->layer_param_.window_data_param().fg_threshold();
+ const float bg_threshold =
+ this->layer_param_.window_data_param().bg_threshold();
for (int i = 0; i < num_windows; ++i) {
int label, x1, y1, x2, y2;
float overlap;
window[WindowDataLayer::Y2] = y2;
// add window to foreground list or background list
- if (overlap >= this->layer_param_.det_fg_threshold()) {
+ if (overlap >= fg_threshold) {
int label = window[WindowDataLayer::LABEL];
CHECK_GT(label, 0);
label_hist.insert(std::make_pair(label, 0));
- } else if (overlap < this->layer_param_.det_bg_threshold()) {
+ } else if (overlap < bg_threshold) {
// background window, force label and overlap to 0
window[WindowDataLayer::LABEL] = 0;
window[WindowDataLayer::OVERLAP] = 0;
LOG(INFO) << "Amount of context padding: "
- << this->layer_param_.det_context_pad();
+ << this->layer_param_.window_data_param().context_pad();
- LOG(INFO) << "Crop mode: " << this->layer_param_.det_crop_mode();
+ LOG(INFO) << "Crop mode: "
+ << this->layer_param_.window_data_param().crop_mode();
// image
- int cropsize = this->layer_param_.cropsize();
- CHECK_GT(cropsize, 0);
- (*top)[0]->Reshape(
- this->layer_param_.batchsize(), channels, cropsize, cropsize);
- prefetch_data_.reset(new Blob<Dtype>(
- this->layer_param_.batchsize(), channels, cropsize, cropsize));
+ int crop_size = this->layer_param_.window_data_param().crop_size();
+ CHECK_GT(crop_size, 0);
+ const int batch_size = this->layer_param_.window_data_param().batch_size();
+ (*top)[0]->Reshape(batch_size, channels, crop_size, crop_size);
+ prefetch_data_.reset(
+ new Blob<Dtype>(batch_size, channels, crop_size, crop_size));
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
// label
- (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
+ (*top)[1]->Reshape(batch_size, 1, 1, 1);
- new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
+ new Blob<Dtype>(batch_size, 1, 1, 1));
// check if we want to have mean
- if (this->layer_param_.has_meanfile()) {
+ if (this->layer_param_.window_data_param().has_mean_file()) {
+ const string& mean_file =
+ this->layer_param_.window_data_param().mean_file();
+ LOG(INFO) << "Loading mean file from" << mean_file;
BlobProto blob_proto;
- LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
- ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
+ ReadProtoFromBinaryFile(mean_file, &blob_proto);
CHECK_EQ(data_mean_.num(), 1);
CHECK_EQ(data_mean_.width(), data_mean_.height());
CHECK_EQ(data_mean_.channels(), channels);
} else {
// Simply initialize an all-empty mean.
- data_mean_.Reshape(1, channels, cropsize, cropsize);
+ data_mean_.Reshape(1, channels, crop_size, crop_size);
// Now, start the prefetch thread. Before calling prefetch, we make two
// cpu_data calls so that the prefetch thread does not accidentally make
optional LRNParameter lrn_param = 16;
optional PaddingParameter padding_param = 17;
optional PoolingParameter pooling_param = 18;
+ optional WindowDataParameter window_data_param = 19;
// Message that stores parameters used by ConcatLayer
optional uint32 stride = 3 [default = 1]; // The stride
+// Message that stores parameters used by WindowDataLayer
message WindowDataParameter {
- // Fields related to detection (det_*)
- // foreground (object) overlap threshold
- optional float det_fg_threshold = 54 [default = 0.5];
- // background (non-object) overlap threshold
- optional float det_bg_threshold = 55 [default = 0.5];
+ // Specify the data source.
+ optional string source = 1;
+ // For data pre-processing, we can do simple scaling and subtracting the
+ // data mean, if provided. Note that the mean subtraction is always carried
+ // out before scaling.
+ optional float scale = 2 [default = 1];
+ optional string mean_file = 3;
+ // Specify the batch size.
+ optional uint32 batch_size = 4;
+ // Specify if we would like to randomly crop an image.
+ optional uint32 crop_size = 5 [default = 0];
+ // Specify if we want to randomly mirror data.
+ optional bool mirror = 6 [default = false];
+ // Foreground (object) overlap threshold
+ optional float fg_threshold = 7 [default = 0.5];
+ // Background (non-object) overlap threshold
+ optional float bg_threshold = 8 [default = 0.5];
// Fraction of batch that should be foreground objects
- optional float det_fg_fraction = 56 [default = 0.25];
- // optional bool OBSOLETE_can_clobber = 57 [ default = true ];
+ optional float fg_fraction = 9 [default = 0.25];
// Amount of contextual padding to add around a window
// (used only by the window_data_layer)
- optional uint32 det_context_pad = 58 [default = 0];
+ optional uint32 context_pad = 10 [default = 0];
// Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped
- optional string det_crop_mode = 59 [default = "warp"];
+ optional string crop_mode = 11 [default = "warp"];
message HDF5OutputParameter {