const Dtype scale = layer->layer_param_.scale();
const int batchsize = layer->layer_param_.batchsize();
const int cropsize = layer->layer_param_.cropsize();
+ const int context_pad = layer->layer_param_.context_pad();
const bool mirror = layer->layer_param_.mirror();
const float fg_fraction = layer->layer_param_.det_fg_fraction();
const Dtype* mean = layer->data_mean_.cpu_data();
const int mean_height = layer->data_mean_.height();
cv::Size cv_crop_size(cropsize, cropsize);
+ // zero out batch
+ memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
+
// CHECK_EQ(mean_width, mean_height);
// CHECK_EQ(mean_width, 256);
// CHECK_EQ(mean_off, 14);
? layer->fg_windows_[rand() % layer->fg_windows_.size()]
: layer->bg_windows_[rand() % layer->bg_windows_.size()];
+ bool do_mirror = false;
+ if (mirror && rand() % 2) {
+ do_mirror = true;
+ }
+
// load the image containing the window
std::pair<std::string, vector<int> > image =
layer->image_database_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];
// CHECK_EQ(channels, 3);
// crop window out of image and warp it
- const int x1 = window[WindowDataLayer<Dtype>::X1];
- const int y1 = window[WindowDataLayer<Dtype>::Y1];
- const int x2 = window[WindowDataLayer<Dtype>::X2];
- const int y2 = window[WindowDataLayer<Dtype>::Y2];
+ int x1 = window[WindowDataLayer<Dtype>::X1];
+ int y1 = window[WindowDataLayer<Dtype>::Y1];
+ int x2 = window[WindowDataLayer<Dtype>::X2];
+ int y2 = window[WindowDataLayer<Dtype>::Y2];
+
+ int pad_w = 0;
+ int pad_h = 0;
+ if (context_pad > 0) {
+ // scale factor by which to expand the original region
+ // such that after warping the expanded region to cropsize x cropsize
+ // there's exactly context_pad amount of padding on each side
+ Dtype context_scale = static_cast<Dtype>(cropsize) /
+ static_cast<Dtype>(cropsize - 2*context_pad);
+
+ // compute the expanded region
+ Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;
+ Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0;
+ Dtype center_x = static_cast<Dtype>(x1) + half_width;
+ Dtype center_y = static_cast<Dtype>(y1) + half_height;
+ x1 = static_cast<int>(round(center_x - half_width*context_scale));
+ x2 = static_cast<int>(round(center_x + half_width*context_scale));
+ y1 = static_cast<int>(round(center_y - half_height*context_scale));
+ y2 = static_cast<int>(round(center_y + half_height*context_scale));
+
+ // the expanded region may go outside of the image
+ // so we compute the clipped (expanded) region and keep track of
+ // the extent beyond the image
+ int unclipped_height = y2-y1+1;
+ int unclipped_width = x2-x1+1;
+ int pad_x1 = std::max(0, -x1);
+ int pad_y1 = std::max(0, -y1);
+ int pad_x2 = std::max(0, x2 - cv_img.cols + 1);
+ int pad_y2 = std::max(0, y2 - cv_img.rows + 1);
+ // clip bounds
+ x1 = x1 + pad_x1;
+ x2 = x2 - pad_x2;
+ y1 = y1 + pad_y1;
+ y2 = y2 - pad_y2;
+ CHECK_GT(x1, -1);
+ CHECK_GT(y1, -1);
+ CHECK_LT(x2, cv_img.cols);
+ CHECK_LT(y2, cv_img.rows);
+
+ int clipped_height = y2-y1+1;
+ int clipped_width = x2-x1+1;
+
+ // scale factors that would be used to warp the unclipped
+ // expanded region
+ Dtype scale_x =
+ static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_width);
+ Dtype scale_y =
+ static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_height);
+
+ // size to warp the clipped expanded region to
+ cv_crop_size.width =
+ static_cast<int>(round(static_cast<Dtype>(clipped_width)*scale_x));
+ cv_crop_size.height =
+ static_cast<int>(round(static_cast<Dtype>(clipped_height)*scale_y));
+ pad_x1 = static_cast<int>(round(static_cast<Dtype>(pad_x1)*scale_x));
+ pad_x2 = static_cast<int>(round(static_cast<Dtype>(pad_x2)*scale_x));
+ pad_y1 = static_cast<int>(round(static_cast<Dtype>(pad_y1)*scale_y));
+ pad_y2 = static_cast<int>(round(static_cast<Dtype>(pad_y2)*scale_y));
+
+ pad_h = pad_y1;
+ // if we're mirroring, we mirror the padding too (to be pedantic)
+ if (do_mirror) {
+ pad_w = pad_x2;
+ } else {
+ pad_w = pad_x1;
+ }
+
+ // ensure that the warped, clipped region plus the padding
+ // fits in the cropsize x cropsize image (it might not due to rounding)
+ if (pad_h + cv_crop_size.height > cropsize) {
+ cv_crop_size.height = cropsize - pad_h;
+ }
+ if (pad_w + cv_crop_size.width > cropsize) {
+ cv_crop_size.width = cropsize - pad_w;
+ }
+ }
+
+// CHECK_GT(x1, -1);
+// CHECK_GT(y1, -1);
+// CHECK_LT(x1, cv_img.cols);
+// CHECK_LT(y1, cv_img.rows);
+// CHECK_GT(x2, x1-1);
+// CHECK_GT(y2, y1-1);
+// CHECK_LT(x2, cv_img.cols);
+// CHECK_LT(y2, cv_img.rows);
+
cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1);
cv::Mat cv_cropped_img = cv_img(roi);
cv::resize(cv_cropped_img, cv_cropped_img,
cv_crop_size, 0, 0, cv::INTER_LINEAR);
// horizontal flip at random
-// bool is_mirror = false;
- if (mirror && rand() % 2) {
+ if (do_mirror) {
cv::flip(cv_cropped_img, cv_cropped_img, 1);
-// is_mirror = true;
}
-
- // TODO(rbg): this could probably be made more efficient
- // but this thread finishes before the GPU is ready,
- // so it's fine for now
+
+ // copy the warped window into top_data
for (int c = 0; c < channels; ++c) {
- for (int h = 0; h < cropsize; ++h) {
- for (int w = 0; w < cropsize; ++w) {
+ for (int h = 0; h < cv_cropped_img.rows; ++h) {
+ for (int w = 0; w < cv_cropped_img.cols; ++w) {
Dtype pixel =
static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
- top_data[((itemid * channels + c) * cropsize + h) * cropsize + w]
+ top_data[((itemid * channels + c) * cropsize + h + pad_h) * cropsize + w + pad_w]
= (pixel
- - mean[(c * mean_height + h + mean_off)
- * mean_width + w + mean_off])
+ - mean[(c * mean_height + h + mean_off + pad_h)
+ * mean_width + w + mean_off + pad_w])
* scale;
}
}
// ss >> file_id;
// std::ofstream inf((string("dump/") + file_id + string("_info.txt")).c_str(), std::ofstream::out);
// inf << image.first << std::endl
-// << x1+1 << std::endl
-// << y1+1 << std::endl
-// << x2+1 << std::endl
-// << y2+1 << std::endl
-// << is_mirror << std::endl
+// << window[WindowDataLayer<Dtype>::X1]+1 << std::endl
+// << window[WindowDataLayer<Dtype>::Y1]+1 << std::endl
+// << window[WindowDataLayer<Dtype>::X2]+1 << std::endl
+// << window[WindowDataLayer<Dtype>::Y2]+1 << std::endl
+// << do_mirror << std::endl
// << top_label[itemid] << std::endl
// << is_fg << std::endl;
-//// << "is_fg: " << is_fg << std::endl
-//// << "label: " << top_label[itemid] << " " << window[WindowDataLayer<Dtype>::LABEL] << std::endl
-//// << "num bg samples: " << num_samples[0] << std::endl
-//// << "num fg samples: " << num_samples[1];
// inf.close();
// std::ofstream top_data_file((string("dump/") + file_id + string("_data.txt")).c_str(),
// std::ofstream::out | std::ofstream::binary);
label_hist[window[WindowDataLayer::LABEL]]++;
}
- if (image_index % 1 == 0) {
+ if (image_index % 100 == 0) {
LOG(INFO) << "num: " << image_index << " "
<< image_path << " "
<< image_size[0] << " "
}
}
- LOG(INFO) << "Number of images: " << image_index;
+ LOG(INFO) << "Number of images: " << image_index+1;
for (int i = 0; i < 21; ++i) {
LOG(INFO) << "class " << i << " has " << label_hist[i] << " samples";
}
+ LOG(INFO) << "Amount of context padding: "
+ << this->layer_param_.context_pad();
+
// image
int cropsize = this->layer_param_.cropsize();
CHECK_GT(cropsize, 0);