From: Ross Girshick Date: Thu, 13 Mar 2014 23:35:10 +0000 (-0700) Subject: add window data layer X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fc7930648273886d75ddc27bf67868fbc2673b80;p=platform%2Fupstream%2Fcaffe.git add window data layer --- diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 1002c59..54e90d2 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -65,6 +65,8 @@ Layer* GetLayer(const LayerParameter& param) { return new SplitLayer(param); } else if (type == "tanh") { return new TanHLayer(param); + } else if (type == "window_data") { + return new WindowDataLayer(param); } else { LOG(FATAL) << "Unknown layer name: " << type; } diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp new file mode 100644 index 0000000..7a62a6e --- /dev/null +++ b/src/caffe/layers/window_data_layer.cpp @@ -0,0 +1,289 @@ +// Copyright 2013 Ross Girshick + +#include +#include + +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +#include +#include +#include + +using std::string; + +// caffe.proto > LayerParameter +// 'source' field specifies the window_file +// 'cropsize' indicates the desired warped size + +// TODO(rbg): +// - try uniform sampling over classes + +namespace caffe { + +template +void* WindowDataLayerPrefetch(void* layer_pointer) { + WindowDataLayer* layer = + reinterpret_cast*>(layer_pointer); + + // At each iteration, sample N windows where N*p are foreground (object) + // windows and N*(1-p) are background (non-object) windows + + Dtype* top_data = layer->prefetch_data_->mutable_cpu_data(); + Dtype* top_label = layer->prefetch_label_->mutable_cpu_data(); + const Dtype scale = layer->layer_param_.scale(); + const int batchsize = layer->layer_param_.batchsize(); + const int cropsize = layer->layer_param_.cropsize(); + const bool mirror = layer->layer_param_.mirror(); + const float fg_fraction = layer->layer_param_.det_fg_fraction(); + const Dtype* mean = layer->data_mean_.cpu_data(); + const int mean_off = (layer->data_mean_.width() - cropsize) / 2; + cv::Size cv_crop_size(cropsize, cropsize); + + const int num_fg = static_cast(static_cast(batchsize) + * fg_fraction); + const int num_samples[2] = { batchsize - num_fg, num_fg }; + + int itemid = 0; + // sample from bg set then fg set + for (int is_fg = 0; is_fg < 2; ++is_fg) { + for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) { + // sample a window + vector window = (is_fg) + ? layer->fg_windows_[rand() % layer->fg_windows_.size()] + : layer->bg_windows_[rand() % layer->bg_windows_.size()]; + + // load the image containing the window + std::pair > image = + layer->image_database_[window[WindowDataLayer::IMAGE_INDEX]]; + + cv::Mat cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR); + if (!cv_img.data) { + LOG(ERROR) << "Could not open or find file " << image.first; + return (void*)NULL; + } + const int channels = cv_img.channels(); + + // crop window out of image and warp it + const int x1 = window[WindowDataLayer::X1]; + const int y1 = window[WindowDataLayer::Y1]; + const int x2 = window[WindowDataLayer::X2]; + const int y2 = window[WindowDataLayer::Y2]; + cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1); + cv::Mat cv_cropped_img = cv_img(roi); + cv::resize(cv_cropped_img, cv_cropped_img, + cv_crop_size, 0, 0, cv::INTER_LINEAR); + + // horizontal flip at random + if (mirror && rand() % 2) { + cv::flip(cv_cropped_img, cv_cropped_img, 1); + } + + // TODO(rbg): this could probably be made more efficient + // but this thread finishes before the GPU is ready, + // so it's fine for now + for (int c = 0; c < channels; ++c) { + for (int h = 0; h < cropsize; ++h) { + for (int w = 0; w < cropsize; ++w) { + char pixel = + static_cast(cv_cropped_img.at(h, w)[c]); + + top_data[((itemid * channels + c) * cropsize + h) * cropsize + w] + = (static_cast(pixel) + - mean[(c * cropsize + h + mean_off) + * cropsize + w + mean_off]) + * scale; + } + } + } + + // get window label + top_label[itemid] = window[WindowDataLayer::LABEL]; + + itemid++; + } + } + + return (void*)NULL; +} + + +template +void WindowDataLayer::SetUp(const vector*>& bottom, + vector*>* top) { + // SetUp runs through the window_file and creates two structures + // that hold windows: one for foreground (object) windows and one + // for background (non-object) windows. We use an overlap threshold + // to decide which is which. + + CHECK_EQ(bottom.size(), 0) << "Window data Layer takes no input blobs."; + CHECK_EQ(top->size(), 2) << "Window data Layer prodcues two blobs as output."; + + // window_file format + // repeated: + // # image_index + // img_path (abs path) + // channels + // height + // width + // num_windows + // class_index overlap x1 y1 x2 y2 + + LOG(INFO) << "Window data layer:" << std::endl + << " foreground (object) overlap threshold: " + << this->layer_param_.det_fg_threshold() << std::endl + << " background (non-object) overlap threshold: " + << this->layer_param_.det_bg_threshold() << std::endl + << " foreground sampling fraction: " + << this->layer_param_.det_fg_fraction(); + + std::ifstream infile(this->layer_param_.source().c_str()); + CHECK(infile.good()) << "Failed to open window file " + << this->layer_param_.source() << std::endl; + + string hashtag; + int image_index, channels; + while (infile >> hashtag >> image_index) { + CHECK_EQ(hashtag, "#"); + // read image path + string image_path; + infile >> image_path; + // read image dimensions + vector image_size(3); + infile >> image_size[0] >> image_size[1] >> image_size[2]; + channels = image_size[0]; + image_database_.push_back(std::make_pair(image_path, image_size)); + + // read each box + int num_windows; + infile >> num_windows; + for (int i = 0; i < num_windows; ++i) { + int label, x1, y1, x2, y2; + float overlap; + infile >> label >> overlap >> x1 >> y1 >> x2 >> y2; + + vector window(WindowDataLayer::NUM); + window[WindowDataLayer::IMAGE_INDEX] = image_index; + window[WindowDataLayer::LABEL] = label; + window[WindowDataLayer::OVERLAP] = overlap; + window[WindowDataLayer::X1] = x1; + window[WindowDataLayer::Y1] = y1; + window[WindowDataLayer::X2] = x2; + window[WindowDataLayer::Y2] = y2; + + // add window to foreground list or background list + if (overlap >= this->layer_param_.det_fg_threshold()) { + fg_windows_.push_back(window); + } else if (overlap < this->layer_param_.det_bg_threshold()) { + // background window, force label and overlap to 0 + window[WindowDataLayer::LABEL] = 0; + window[WindowDataLayer::OVERLAP] = 0; + bg_windows_.push_back(window); + } + } + + if (image_index % 1000 == 0) { + LOG(INFO) << "num: " << image_index << " " + << image_path << " " + << image_size[0] << " " + << image_size[1] << " " + << image_size[2] << " " + << "windows to process: " << num_windows; + } + } + + // image + int cropsize = this->layer_param_.cropsize(); + CHECK_GT(cropsize, 0); + (*top)[0]->Reshape( + this->layer_param_.batchsize(), channels, cropsize, cropsize); + prefetch_data_.reset(new Blob( + this->layer_param_.batchsize(), channels, cropsize, cropsize)); + + LOG(INFO) << "output data size: " << (*top)[0]->num() << "," + << (*top)[0]->channels() << "," << (*top)[0]->height() << "," + << (*top)[0]->width(); + // label + (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1); + prefetch_label_.reset( + new Blob(this->layer_param_.batchsize(), 1, 1, 1)); + + // check if we want to have mean + if (this->layer_param_.has_meanfile()) { + BlobProto blob_proto; + LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile(); + ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto); + data_mean_.FromProto(blob_proto); + CHECK_EQ(data_mean_.num(), 1); + CHECK_EQ(data_mean_.width(), data_mean_.height()); + CHECK_EQ(data_mean_.channels(), channels); + } else { + // Simply initialize an all-empty mean. + data_mean_.Reshape(1, channels, cropsize, cropsize); + } + // Now, start the prefetch thread. Before calling prefetch, we make two + // cpu_data calls so that the prefetch thread does not accidentally make + // simultaneous cudaMalloc calls when the main thread is running. In some + // GPUs this seems to cause failures if we do not so. + prefetch_data_->mutable_cpu_data(); + prefetch_label_->mutable_cpu_data(); + data_mean_.cpu_data(); + DLOG(INFO) << "Initializing prefetch"; + CHECK(!pthread_create(&thread_, NULL, WindowDataLayerPrefetch, + reinterpret_cast(this))) << "Pthread execution failed."; + DLOG(INFO) << "Prefetch initialized."; +} + +template +void WindowDataLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + // First, join the thread + CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; + // Copy the data + memcpy((*top)[0]->mutable_cpu_data(), prefetch_data_->cpu_data(), + sizeof(Dtype) * prefetch_data_->count()); + memcpy((*top)[1]->mutable_cpu_data(), prefetch_label_->cpu_data(), + sizeof(Dtype) * prefetch_label_->count()); + // Start a new prefetch thread + CHECK(!pthread_create(&thread_, NULL, WindowDataLayerPrefetch, + reinterpret_cast(this))) << "Pthread execution failed."; +} + +template +void WindowDataLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + // First, join the thread + CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; + // Copy the data + CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(), + prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(), + cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(), + prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(), + cudaMemcpyHostToDevice)); + // Start a new prefetch thread + CHECK(!pthread_create(&thread_, NULL, WindowDataLayerPrefetch, + reinterpret_cast(this))) << "Pthread execution failed."; +} + +// The backward operations are dummy - they do not carry any computation. +template +Dtype WindowDataLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +template +Dtype WindowDataLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +INSTANTIATE_CLASS(WindowDataLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index e0bccdd..a1b7776 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -92,6 +92,18 @@ message LayerParameter { // be larger than the number of keys in the leveldb. optional uint32 rand_skip = 53 [ default = 0 ]; + // Fields related to detection (det_*) + // foreground (object) overlap threshold + optional float det_fg_threshold = 54 [default = 0.5]; + // background (non-object) overlap threshold + optional float det_bg_threshold = 55 [default = 0.3]; + // Fraction of batch that should be foreground objects + optional float det_fg_fraction = 56 [default = 0.25]; + + // Set can_clobber to false to avoid overriding it with a pretrained + // network's parameters when finetuning + optional bool can_clobber = 57 [default = true]; + // For ReshapeLayer, one needs to specify the new dimensions. optional int32 new_num = 60 [default = 0]; optional int32 new_channels = 61 [default = 0];