incorporate WindowDataLayer

author Jeff Donahue <jeff.donahue@gmail.com>

Fri, 21 Mar 2014 20:45:38 +0000 (13:45 -0700)

committer Jeff Donahue <jeff.donahue@gmail.com>

Fri, 28 Mar 2014 06:42:28 +0000 (23:42 -0700)
author Jeff Donahue <jeff.donahue@gmail.com>
Fri, 21 Mar 2014 20:45:38 +0000 (13:45 -0700)
committer Jeff Donahue <jeff.donahue@gmail.com>
Fri, 28 Mar 2014 06:42:28 +0000 (23:42 -0700)
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp

index bf62bcb49c2003f862d9f69e8275bbf5349c14eb..9346c6f057485114071cbd2bb136f26cf434e101 100644 (file)
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -26,7 +26,7 @@ using std::pair;
  
  // caffe.proto > LayerParameter
  //   'source' field specifies the window_file
-//   'cropsize' indicates the desired warped size
+//   'crop_size' indicates the desired warped size
  
  namespace caffe {
  
@@ -40,27 +40,28 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
  
    Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
    Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
-  const Dtype scale = layer->layer_param_.scale();
-  const int batchsize = layer->layer_param_.batchsize();
-  const int cropsize = layer->layer_param_.cropsize();
-  const int context_pad = layer->layer_param_.det_context_pad();
-  const bool mirror = layer->layer_param_.mirror();
-  const float fg_fraction = layer->layer_param_.det_fg_fraction();
+  const Dtype scale = layer->layer_param_.window_data_param().scale();
+  const int batch_size = layer->layer_param_.window_data_param().batch_size();
+  const int crop_size = layer->layer_param_.window_data_param().crop_size();
+  const int context_pad = layer->layer_param_.window_data_param().context_pad();
+  const bool mirror = layer->layer_param_.window_data_param().mirror();
+  const float fg_fraction =
+      layer->layer_param_.window_data_param().fg_fraction();
    const Dtype* mean = layer->data_mean_.cpu_data();
-  const int mean_off = (layer->data_mean_.width() - cropsize) / 2;
+  const int mean_off = (layer->data_mean_.width() - crop_size) / 2;
    const int mean_width = layer->data_mean_.width();
    const int mean_height = layer->data_mean_.height();
-  cv::Size cv_crop_size(cropsize, cropsize);
-  const string& crop_mode = layer->layer_param_.det_crop_mode();
+  cv::Size cv_crop_size(crop_size, crop_size);
+  const string& crop_mode = layer->layer_param_.window_data_param().crop_mode();
  
    bool use_square = (crop_mode == "square") ? true : false;
  
    // zero out batch
    memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
  
-  const int num_fg = static_cast<int>(static_cast<float>(batchsize)
+  const int num_fg = static_cast<int>(static_cast<float>(batch_size)
        * fg_fraction);
-  const int num_samples[2] = { batchsize - num_fg, num_fg };
+  const int num_samples[2] = { batch_size - num_fg, num_fg };
  
    int itemid = 0;
    // sample from bg set then fg set
@@ -100,10 +101,10 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
        int pad_h = 0;
        if (context_pad > 0 || use_square) {
          // scale factor by which to expand the original region
-        // such that after warping the expanded region to cropsize x cropsize
+        // such that after warping the expanded region to crop_size x crop_size
          // there's exactly context_pad amount of padding on each side
-        Dtype context_scale = static_cast<Dtype>(cropsize) /
-            static_cast<Dtype>(cropsize - 2*context_pad);
+        Dtype context_scale = static_cast<Dtype>(crop_size) /
+            static_cast<Dtype>(crop_size - 2*context_pad);
  
          // compute the expanded region
          Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;
@@ -147,9 +148,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
          // scale factors that would be used to warp the unclipped
          // expanded region
          Dtype scale_x =
-            static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_width);
+            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width);
          Dtype scale_y =
-            static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_height);
+            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height);
  
          // size to warp the clipped expanded region to
          cv_crop_size.width =
@@ -169,13 +170,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
            pad_w = pad_x1;
          }
  
-        // ensure that the warped, clipped region plus the padding
-        // fits in the cropsize x cropsize image (it might not due to rounding)
-        if (pad_h + cv_crop_size.height > cropsize) {
-          cv_crop_size.height = cropsize - pad_h;
+        // ensure that the warped, clipped region plus the padding fits in the
+        // crop_size x crop_size image (it might not due to rounding)
+        if (pad_h + cv_crop_size.height > crop_size) {
+          cv_crop_size.height = crop_size - pad_h;
          }
-        if (pad_w + cv_crop_size.width > cropsize) {
-          cv_crop_size.width = cropsize - pad_w;
+        if (pad_w + cv_crop_size.width > crop_size) {
+          cv_crop_size.width = crop_size - pad_w;
          }
        }
  
@@ -196,8 +197,8 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
              Dtype pixel =
                  static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
  
-            top_data[((itemid * channels + c) * cropsize + h + pad_h)
-                     * cropsize + w + pad_w]
+            top_data[((itemid * channels + c) * crop_size + h + pad_h)
+                     * crop_size + w + pad_w]
                  = (pixel
                      - mean[(c * mean_height + h + mean_off + pad_h)
                             * mean_width + w + mean_off + pad_w])
@@ -231,11 +232,11 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
            string("_data.txt")).c_str(),
            std::ofstream::out | std::ofstream::binary);
        for (int c = 0; c < channels; ++c) {
-        for (int h = 0; h < cropsize; ++h) {
-          for (int w = 0; w < cropsize; ++w) {
+        for (int h = 0; h < crop_size; ++h) {
+          for (int w = 0; w < crop_size; ++w) {
              top_data_file.write(reinterpret_cast<char*>(
-                &top_data[((itemid * channels + c) * cropsize + h)
-                          * cropsize + w]),
+                &top_data[((itemid * channels + c) * crop_size + h)
+                          * crop_size + w]),
                  sizeof(Dtype));
            }
          }
@@ -278,15 +279,15 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
  
    LOG(INFO) << "Window data layer:" << std::endl
        << "  foreground (object) overlap threshold: "
-      << this->layer_param_.det_fg_threshold() << std::endl
+      << this->layer_param_.window_data_param().fg_threshold() << std::endl
        << "  background (non-object) overlap threshold: "
-      << this->layer_param_.det_bg_threshold() << std::endl
+      << this->layer_param_.window_data_param().bg_threshold() << std::endl
        << "  foreground sampling fraction: "
-      << this->layer_param_.det_fg_fraction();
+      << this->layer_param_.window_data_param().fg_fraction();
  
-  std::ifstream infile(this->layer_param_.source().c_str());
+  std::ifstream infile(this->layer_param_.window_data_param().source().c_str());
    CHECK(infile.good()) << "Failed to open window file "
-      << this->layer_param_.source() << std::endl;
+      << this->layer_param_.window_data_param().source() << std::endl;
  
    map<int, int> label_hist;
    label_hist.insert(std::make_pair(0, 0));
@@ -307,6 +308,10 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
      // read each box
      int num_windows;
      infile >> num_windows;
+    const float fg_threshold =
+        this->layer_param_.window_data_param().fg_threshold();
+    const float bg_threshold =
+        this->layer_param_.window_data_param().bg_threshold();
      for (int i = 0; i < num_windows; ++i) {
        int label, x1, y1, x2, y2;
        float overlap;
@@ -322,13 +327,13 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
        window[WindowDataLayer::Y2] = y2;
  
        // add window to foreground list or background list
-      if (overlap >= this->layer_param_.det_fg_threshold()) {
+      if (overlap >= fg_threshold) {
          int label = window[WindowDataLayer::LABEL];
          CHECK_GT(label, 0);
          fg_windows_.push_back(window);
          label_hist.insert(std::make_pair(label, 0));
          label_hist[label]++;
-      } else if (overlap < this->layer_param_.det_bg_threshold()) {
+      } else if (overlap < bg_threshold) {
          // background window, force label and overlap to 0
          window[WindowDataLayer::LABEL] = 0;
          window[WindowDataLayer::OVERLAP] = 0;
@@ -356,38 +361,41 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
    }
  
    LOG(INFO) << "Amount of context padding: "
-      << this->layer_param_.det_context_pad();
+      << this->layer_param_.window_data_param().context_pad();
  
-  LOG(INFO) << "Crop mode: " << this->layer_param_.det_crop_mode();
+  LOG(INFO) << "Crop mode: "
+      << this->layer_param_.window_data_param().crop_mode();
  
    // image
-  int cropsize = this->layer_param_.cropsize();
-  CHECK_GT(cropsize, 0);
-  (*top)[0]->Reshape(
-      this->layer_param_.batchsize(), channels, cropsize, cropsize);
-  prefetch_data_.reset(new Blob<Dtype>(
-      this->layer_param_.batchsize(), channels, cropsize, cropsize));
+  int crop_size = this->layer_param_.window_data_param().crop_size();
+  CHECK_GT(crop_size, 0);
+  const int batch_size = this->layer_param_.window_data_param().batch_size();
+  (*top)[0]->Reshape(batch_size, channels, crop_size, crop_size);
+  prefetch_data_.reset(
+      new Blob<Dtype>(batch_size, channels, crop_size, crop_size));
  
    LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
        << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
        << (*top)[0]->width();
    // label
-  (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
+  (*top)[1]->Reshape(batch_size, 1, 1, 1);
    prefetch_label_.reset(
-      new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
+      new Blob<Dtype>(batch_size, 1, 1, 1));
  
    // check if we want to have mean
-  if (this->layer_param_.has_meanfile()) {
+  if (this->layer_param_.window_data_param().has_mean_file()) {
+    const string& mean_file =
+        this->layer_param_.window_data_param().mean_file();
+    LOG(INFO) << "Loading mean file from" << mean_file;
      BlobProto blob_proto;
-    LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
-    ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
+    ReadProtoFromBinaryFile(mean_file, &blob_proto);
      data_mean_.FromProto(blob_proto);
      CHECK_EQ(data_mean_.num(), 1);
      CHECK_EQ(data_mean_.width(), data_mean_.height());
      CHECK_EQ(data_mean_.channels(), channels);
    } else {
      // Simply initialize an all-empty mean.
-    data_mean_.Reshape(1, channels, cropsize, cropsize);
+    data_mean_.Reshape(1, channels, crop_size, crop_size);
    }
    // Now, start the prefetch thread. Before calling prefetch, we make two
    // cpu_data calls so that the prefetch thread does not accidentally make
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto

index 90113c96d022d715798892202652e01306c22bfb..51ea340c6b74692f9c747d9718614e5ced3f7db4 100644 (file)
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -153,6 +153,7 @@ message LayerParameter {
    optional LRNParameter lrn_param = 16;
    optional PaddingParameter padding_param = 17;
    optional PoolingParameter pooling_param = 18;
+  optional WindowDataParameter window_data_param = 19;
  }
  
  // Message that stores parameters used by ConcatLayer
@@ -275,25 +276,34 @@ message PoolingParameter {
    optional uint32 stride = 3 [default = 1]; // The stride
  }
  
+// Message that stores parameters used by WindowDataLayer
  message WindowDataParameter {
-  // Fields related to detection (det_*)
-  // foreground (object) overlap threshold
-  optional float det_fg_threshold = 54 [default = 0.5];
-  // background (non-object) overlap threshold
-  optional float det_bg_threshold = 55 [default = 0.5];
+  // Specify the data source.
+  optional string source = 1;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 6 [default = false];
+  // Foreground (object) overlap threshold
+  optional float fg_threshold = 7 [default = 0.5];
+  // Background (non-object) overlap threshold
+  optional float bg_threshold = 8 [default = 0.5];
    // Fraction of batch that should be foreground objects
-  optional float det_fg_fraction = 56 [default = 0.25];
-
-  // optional bool OBSOLETE_can_clobber = 57 [ default = true ];
-
+  optional float fg_fraction = 9 [default = 0.25];
    // Amount of contextual padding to add around a window
    // (used only by the window_data_layer)
-  optional uint32 det_context_pad = 58 [default = 0];
-
+  optional uint32 context_pad = 10 [default = 0];
    // Mode for cropping out a detection window
    // warp: cropped window is warped to a fixed size and aspect ratio
    // square: the tightest square around the window is cropped
-  optional string det_crop_mode = 59 [default = "warp"];
+  optional string crop_mode = 11 [default = "warp"];
  }
  
  message HDF5OutputParameter {
author	Jeff Donahue <jeff.donahue@gmail.com>
	Fri, 21 Mar 2014 20:45:38 +0000 (13:45 -0700)
committer	Jeff Donahue <jeff.donahue@gmail.com>
	Fri, 28 Mar 2014 06:42:28 +0000 (23:42 -0700)
src/caffe/layers/window_data_layer.cpp		patch \| blob \| history
src/caffe/proto/caffe.proto		patch \| blob \| history