From ec6f3b8466f68fc82f8b25dd048c26d3fd630784 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 19 Mar 2014 20:50:13 -0700 Subject: [PATCH] minor cleanup in rcnn-finetuning -- rcnn feature computation tested at this commit (in addition to all caffe unit tests passing) --- matlab/caffe/matcaffe.cpp | 18 ++++--- src/caffe/layers/window_data_layer.cpp | 90 ++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/matlab/caffe/matcaffe.cpp b/matlab/caffe/matcaffe.cpp index 524393a..1913abd 100644 --- a/matlab/caffe/matcaffe.cpp +++ b/matlab/caffe/matcaffe.cpp @@ -25,7 +25,7 @@ static int init_key = -2; // matlab uses RGB color channel order // images need to have the data mean subtracted // -// Data coming in from matlab needs to be in the order +// Data coming in from matlab needs to be in the order // [width, height, channels, images] // where width is the fastest dimension. // Here is the rough matlab for putting image data into the correct @@ -131,7 +131,7 @@ static mxArray* do_get_weights() { prev_layer_name = layer_names[i]; const mwSize dims[2] = {layer_blobs.size(), 1}; mx_layer_cells = mxCreateCellArray(2, dims); - mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells); + mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells); mxSetField(mx_layers, mx_layer_index, "layer_names", mxCreateString(layer_names[i].c_str())); mx_layer_index++; @@ -142,18 +142,19 @@ static mxArray* do_get_weights() { // where width is the fastest dimension mwSize dims[4] = {layer_blobs[j]->width(), layer_blobs[j]->height(), layer_blobs[j]->channels(), layer_blobs[j]->num()}; - mxArray* mx_weights = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL); + mxArray* mx_weights = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, + mxREAL); mxSetCell(mx_layer_cells, j, mx_weights); float* weights_ptr = reinterpret_cast(mxGetPr(mx_weights)); -// mexPrintf("layer: %s (%d) blob: %d %d: (%d, %d, %d) %d\n", -// layer_names[i].c_str(), i, j, layer_blobs[j]->num(), -// layer_blobs[j]->height(), layer_blobs[j]->width(), -// layer_blobs[j]->channels(), layer_blobs[j]->count()); + // mexPrintf("layer: %s (%d) blob: %d %d: (%d, %d, %d) %d\n", + // layer_names[i].c_str(), i, j, layer_blobs[j]->num(), + // layer_blobs[j]->height(), layer_blobs[j]->width(), + // layer_blobs[j]->channels(), layer_blobs[j]->count()); switch (Caffe::mode()) { case Caffe::CPU: - memcpy(weights_ptr, layer_blobs[j]->cpu_data(), + memcpy(weights_ptr, layer_blobs[j]->cpu_data(), sizeof(float) * layer_blobs[j]->count()); break; case Caffe::GPU: @@ -219,6 +220,7 @@ static void init(MEX_ARGS) { mxFree(param_file); mxFree(model_file); + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) init_key = rand(); if (nlhs == 1) { plhs[0] = mxCreateDoubleScalar(init_key); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 3fdc4f9..87fb541 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -5,19 +5,21 @@ #include #include +#include #include #include #include -#include +#include // NOLINT(readability/streams) +#include + +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" #include "caffe/layer.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" -#include -#include -#include - using std::string; using std::map; using std::pair; @@ -30,7 +32,7 @@ namespace caffe { template void* WindowDataLayerPrefetch(void* layer_pointer) { - WindowDataLayer* layer = + WindowDataLayer* layer = reinterpret_cast*>(layer_pointer); // At each iteration, sample N windows where N*p are foreground (object) @@ -56,7 +58,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { // zero out batch memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count()); - const int num_fg = static_cast(static_cast(batchsize) + const int num_fg = static_cast(static_cast(batchsize) * fg_fraction); const int num_samples[2] = { batchsize - num_fg, num_fg }; @@ -65,23 +67,26 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { for (int is_fg = 0; is_fg < 2; ++is_fg) { for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) { // sample a window - vector window = (is_fg) + vector window = (is_fg) + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) ? layer->fg_windows_[rand() % layer->fg_windows_.size()] + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) : layer->bg_windows_[rand() % layer->bg_windows_.size()]; bool do_mirror = false; + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) if (mirror && rand() % 2) { do_mirror = true; } // load the image containing the window - pair > image = + pair > image = layer->image_database_[window[WindowDataLayer::IMAGE_INDEX]]; cv::Mat cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR); if (!cv_img.data) { LOG(ERROR) << "Could not open or find file " << image.first; - return (void*)NULL; + return reinterpret_cast(NULL); } const int channels = cv_img.channels(); @@ -94,7 +99,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { int pad_w = 0; int pad_h = 0; if (context_pad > 0 || use_square) { - // scale factor by which to expand the original region + // scale factor by which to expand the original region // such that after warping the expanded region to cropsize x cropsize // there's exactly context_pad amount of padding on each side Dtype context_scale = static_cast(cropsize) / @@ -116,7 +121,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { x2 = static_cast(round(center_x + half_width*context_scale)); y1 = static_cast(round(center_y - half_height*context_scale)); y2 = static_cast(round(center_y + half_height*context_scale)); - + // the expanded region may go outside of the image // so we compute the clipped (expanded) region and keep track of // the extent beyond the image @@ -139,17 +144,17 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { int clipped_height = y2-y1+1; int clipped_width = x2-x1+1; - // scale factors that would be used to warp the unclipped + // scale factors that would be used to warp the unclipped // expanded region - Dtype scale_x = + Dtype scale_x = static_cast(cropsize)/static_cast(unclipped_width); - Dtype scale_y = + Dtype scale_y = static_cast(cropsize)/static_cast(unclipped_height); // size to warp the clipped expanded region to - cv_crop_size.width = + cv_crop_size.width = static_cast(round(static_cast(clipped_width)*scale_x)); - cv_crop_size.height = + cv_crop_size.height = static_cast(round(static_cast(clipped_height)*scale_y)); pad_x1 = static_cast(round(static_cast(pad_x1)*scale_x)); pad_x2 = static_cast(round(static_cast(pad_x2)*scale_x)); @@ -176,9 +181,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1); cv::Mat cv_cropped_img = cv_img(roi); - cv::resize(cv_cropped_img, cv_cropped_img, + cv::resize(cv_cropped_img, cv_cropped_img, cv_crop_size, 0, 0, cv::INTER_LINEAR); - + // horizontal flip at random if (do_mirror) { cv::flip(cv_cropped_img, cv_cropped_img, 1); @@ -188,12 +193,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { for (int c = 0; c < channels; ++c) { for (int h = 0; h < cv_cropped_img.rows; ++h) { for (int w = 0; w < cv_cropped_img.cols; ++w) { - Dtype pixel = + Dtype pixel = static_cast(cv_cropped_img.at(h, w)[c]); - top_data[((itemid * channels + c) * cropsize + h + pad_h) * cropsize + w + pad_w] + top_data[((itemid * channels + c) * cropsize + h + pad_h) + * cropsize + w + pad_w] = (pixel - - mean[(c * mean_height + h + mean_off + pad_h) + - mean[(c * mean_height + h + mean_off + pad_h) * mean_width + w + mean_off + pad_w]) * scale; } @@ -207,11 +213,12 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { // useful debugging code for dumping transformed windows to disk string file_id; std::stringstream ss; + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) ss << rand(); ss >> file_id; - std::ofstream inf((string("dump/") + file_id + + std::ofstream inf((string("dump/") + file_id + string("_info.txt")).c_str(), std::ofstream::out); - inf << image.first << std::endl + inf << image.first << std::endl << window[WindowDataLayer::X1]+1 << std::endl << window[WindowDataLayer::Y1]+1 << std::endl << window[WindowDataLayer::X2]+1 << std::endl @@ -220,15 +227,15 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { << top_label[itemid] << std::endl << is_fg << std::endl; inf.close(); - std::ofstream top_data_file((string("dump/") + file_id + - string("_data.txt")).c_str(), + std::ofstream top_data_file((string("dump/") + file_id + + string("_data.txt")).c_str(), std::ofstream::out | std::ofstream::binary); for (int c = 0; c < channels; ++c) { for (int h = 0; h < cropsize; ++h) { for (int w = 0; w < cropsize; ++w) { - top_data_file.write( - reinterpret_cast(&top_data[((itemid * channels + c) - * cropsize + h) * cropsize + w]), + top_data_file.write(reinterpret_cast( + &top_data[((itemid * channels + c) * cropsize + h) + * cropsize + w]), sizeof(Dtype)); } } @@ -240,7 +247,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { } } - return (void*)NULL; + return reinterpret_cast(NULL); } template @@ -251,9 +258,9 @@ WindowDataLayer::~WindowDataLayer() { template void WindowDataLayer::SetUp(const vector*>& bottom, vector*>* top) { - // SetUp runs through the window_file and creates two structures - // that hold windows: one for foreground (object) windows and one - // for background (non-object) windows. We use an overlap threshold + // SetUp runs through the window_file and creates two structures + // that hold windows: one for foreground (object) windows and one + // for background (non-object) windows. We use an overlap threshold // to decide which is which. CHECK_EQ(bottom.size(), 0) << "Window data Layer takes no input blobs."; @@ -270,15 +277,15 @@ void WindowDataLayer::SetUp(const vector*>& bottom, // class_index overlap x1 y1 x2 y2 LOG(INFO) << "Window data layer:" << std::endl - << " foreground (object) overlap threshold: " + << " foreground (object) overlap threshold: " << this->layer_param_.det_fg_threshold() << std::endl - << " background (non-object) overlap threshold: " + << " background (non-object) overlap threshold: " << this->layer_param_.det_bg_threshold() << std::endl << " foreground sampling fraction: " << this->layer_param_.det_fg_fraction(); std::ifstream infile(this->layer_param_.source().c_str()); - CHECK(infile.good()) << "Failed to open window file " + CHECK(infile.good()) << "Failed to open window file " << this->layer_param_.source() << std::endl; map label_hist; @@ -313,7 +320,7 @@ void WindowDataLayer::SetUp(const vector*>& bottom, window[WindowDataLayer::Y1] = y1; window[WindowDataLayer::X2] = x2; window[WindowDataLayer::Y2] = y2; - + // add window to foreground list or background list if (overlap >= this->layer_param_.det_fg_threshold()) { int label = window[WindowDataLayer::LABEL]; @@ -332,7 +339,7 @@ void WindowDataLayer::SetUp(const vector*>& bottom, if (image_index % 100 == 0) { LOG(INFO) << "num: " << image_index << " " - << image_path << " " + << image_path << " " << image_size[0] << " " << image_size[1] << " " << image_size[2] << " " @@ -342,12 +349,13 @@ void WindowDataLayer::SetUp(const vector*>& bottom, LOG(INFO) << "Number of images: " << image_index+1; - for (map::iterator it = label_hist.begin(); + for (map::iterator it = label_hist.begin(); it != label_hist.end(); ++it) { - LOG(INFO) << "class " << it->first << " has " << label_hist[it->first] << " samples"; + LOG(INFO) << "class " << it->first << " has " << label_hist[it->first] + << " samples"; } - LOG(INFO) << "Amount of context padding: " + LOG(INFO) << "Amount of context padding: " << this->layer_param_.det_context_pad(); LOG(INFO) << "Crop mode: " << this->layer_param_.det_crop_mode(); -- 2.7.4