From 42a2a3fc09a8268cc9e5fac43472b38b7aaf1336 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sat, 24 May 2014 18:09:05 -0700 Subject: [PATCH] optionally output the mask to a top blob instead of storing internally --- src/caffe/layers/pooling_layer.cpp | 66 +++++++++++++++++++++++++++++-------- src/caffe/layers/pooling_layer.cu | 67 ++++++++++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 30 deletions(-) diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index 3d216de..928c8c7 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -19,7 +19,15 @@ template void PoolingLayer::SetUp(const vector*>& bottom, vector*>* top) { CHECK_EQ(bottom.size(), 1) << "PoolingLayer takes a single blob as input."; - CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output."; + if (this->layer_param_.pooling_param().pool() == + PoolingParameter_PoolMethod_MAX) { + CHECK_GE(top->size(), 1) + << "MaxPoolingLayer takes at least one blob as output."; + CHECK_LE(top->size(), 2) + << "MaxPoolingLayer takes at most two blobs as output."; + } else { + CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output."; + } kernel_size_ = this->layer_param_.pooling_param().kernel_size(); stride_ = this->layer_param_.pooling_param().stride(); pad_ = this->layer_param_.pooling_param().pad(); @@ -37,9 +45,12 @@ void PoolingLayer::SetUp(const vector*>& bottom, width_ + 2 * pad_ - kernel_size_) / stride_)) + 1; (*top)[0]->Reshape(bottom[0]->num(), channels_, pooled_height_, pooled_width_); + if (top->size() > 1) { + (*top)[1]->ReshapeLike(*(*top)[0]); + } // If max pooling, we will initialize the vector index part. if (this->layer_param_.pooling_param().pool() == - PoolingParameter_PoolMethod_MAX) { + PoolingParameter_PoolMethod_MAX && top->size() == 1) { max_idx_.reset(new Blob(bottom[0]->num(), channels_, pooled_height_, pooled_width_)); } @@ -61,15 +72,23 @@ Dtype PoolingLayer::Forward_cpu(const vector*>& bottom, // Different pooling methods. We explicitly do the switch outside the for // loop to save time, although this results in more codes. int top_count = (*top)[0]->count(); + // We'll output the mask to top[1] if it's of size >1. + const bool use_top_mask = top->size() > 1; int* mask; + Dtype* top_mask; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: - // Initialize - mask = max_idx_->mutable_cpu_data(); - for (int i = 0; i < top_count; ++i) { - top_data[i] = -FLT_MAX; - mask[i] = -1; + // Initialize + if (use_top_mask) { + top_mask = (*top)[1]->mutable_cpu_data(); + caffe_set(top_count, Dtype(-1), top_mask); + } else { + mask = max_idx_->mutable_cpu_data(); + for (int i = 0; i < top_count; ++i) { + mask[i] = -1; + } } + caffe_set(top_count, Dtype(-FLT_MAX), top_data); // The main loop for (int n = 0; n < bottom[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { @@ -85,7 +104,11 @@ Dtype PoolingLayer::Forward_cpu(const vector*>& bottom, const int index = h * width_ + w; if (bottom_data[index] > top_data[pool_index]) { top_data[pool_index] = bottom_data[index]; - mask[pool_index] = index; + if (use_top_mask) { + top_mask[pool_index] = static_cast(index); + } else { + mask[pool_index] = index; + } } } } @@ -94,7 +117,11 @@ Dtype PoolingLayer::Forward_cpu(const vector*>& bottom, // compute offset bottom_data += bottom[0]->offset(0, 1); top_data += (*top)[0]->offset(0, 1); - mask += (*top)[0]->offset(0, 1); + if (use_top_mask) { + top_mask += (*top)[0]->offset(0, 1); + } else { + mask += (*top)[0]->offset(0, 1); + } } } break; @@ -150,23 +177,36 @@ void PoolingLayer::Backward_cpu(const vector*>& top, Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); // Different pooling methods. We explicitly do the switch outside the for // loop to save time, although this results in more codes. - memset(bottom_diff, 0, (*bottom)[0]->count() * sizeof(Dtype)); + caffe_set((*bottom)[0]->count(), Dtype(0), bottom_diff); + // We'll output the mask to top[1] if it's of size >1. + const bool use_top_mask = top.size() > 1; const int* mask; + const Dtype* top_mask; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: // The main loop - mask = max_idx_->cpu_data(); + if (use_top_mask) { + top_mask = top[1]->cpu_data(); + } else { + mask = max_idx_->cpu_data(); + } for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { const int index = ph * pooled_width_ + pw; - bottom_diff[mask[index]] += top_diff[index]; + const int bottom_index = + use_top_mask ? top_mask[index] : mask[index]; + bottom_diff[bottom_index] += top_diff[index]; } } bottom_diff += (*bottom)[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); - mask += top[0]->offset(0, 1); + if (use_top_mask) { + top_mask += top[0]->offset(0, 1); + } else { + mask += top[0]->offset(0, 1); + } } } break; diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu index 49e5d77..31be47e 100644 --- a/src/caffe/layers/pooling_layer.cu +++ b/src/caffe/layers/pooling_layer.cu @@ -17,7 +17,8 @@ template __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, - const int kernel_size, const int stride, Dtype* top_data, int* mask) { + const int kernel_size, const int stride, Dtype* top_data, + int* mask, Dtype* top_mask) { CUDA_KERNEL_LOOP(index, nthreads) { int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; @@ -39,7 +40,11 @@ __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data, } } top_data[index] = maxval; - mask[index] = maxidx; + if (mask) { + mask[index] = maxidx; + } else { + top_mask[index] = maxidx; + } } } @@ -150,15 +155,22 @@ Dtype PoolingLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); int count = (*top)[0]->count(); - int* mask; + // We'll output the mask to top[1] if it's of size >1. + const bool use_top_mask = top->size() > 1; + int* mask = NULL; + Dtype* top_mask = NULL; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: - mask = max_idx_->mutable_gpu_data(); + if (use_top_mask) { + top_mask = (*top)[1]->mutable_gpu_data(); + } else { + mask = max_idx_->mutable_gpu_data(); + } // NOLINT_NEXT_LINE(whitespace/operators) MaxPoolForward<<>>( count, bottom_data, bottom[0]->num(), channels_, height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_, - top_data, mask); + top_data, mask, top_mask); break; case PoolingParameter_PoolMethod_AVE: // NOLINT_NEXT_LINE(whitespace/operators) @@ -197,9 +209,10 @@ Dtype PoolingLayer::Forward_gpu(const vector*>& bottom, template __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff, - const int* mask, const int num, const int channels, const int height, - const int width, const int pooled_height, const int pooled_width, - const int kernel_size, const int stride, Dtype* bottom_diff) { + const int* mask, const Dtype* top_mask, const int num, const int channels, + const int height, const int width, const int pooled_height, + const int pooled_width, const int kernel_size, const int stride, + Dtype* bottom_diff) { CUDA_KERNEL_LOOP(index, nthreads) { // find out the local index // find out the local offset @@ -212,12 +225,25 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff, int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1; int pwend = min(w / stride + 1, pooled_width); Dtype gradient = 0; - top_diff += (n * channels + c) * pooled_height * pooled_width; - mask += (n * channels + c) * pooled_height * pooled_width; - for (int ph = phstart; ph < phend; ++ph) { - for (int pw = pwstart; pw < pwend; ++pw) { - if (mask[ph * pooled_width + pw] == h * width + w) - gradient += top_diff[ph * pooled_width + pw]; + int offset = (n * channels + c) * pooled_height * pooled_width; + top_diff += offset; + if (mask) { + mask += offset; + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + if (mask[ph * pooled_width + pw] == h * width + w) { + gradient += top_diff[ph * pooled_width + pw]; + } + } + } + } else { + top_mask += offset; + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + if (top_mask[ph * pooled_width + pw] == h * width + w) { + gradient += top_diff[ph * pooled_width + pw]; + } + } } } bottom_diff[index] = gradient; @@ -300,14 +326,21 @@ void PoolingLayer::Backward_gpu(const vector*>& top, Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); int count = (*bottom)[0]->count(); CUDA_CHECK(cudaMemset(bottom_diff, 0, sizeof(Dtype) * count)); - const int* mask; + // We'll output the mask to top[1] if it's of size >1. + const bool use_top_mask = top.size() > 1; + const int* mask = NULL; + const Dtype* top_mask = NULL; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: - mask = max_idx_->gpu_data(); + if (use_top_mask) { + top_mask = top[1]->gpu_data(); + } else { + mask = max_idx_->gpu_data(); + } caffe_gpu_set(count, Dtype(0.), bottom_diff); // NOLINT_NEXT_LINE(whitespace/operators) MaxPoolBackward<<>>( - count, top_diff, mask, top[0]->num(), channels_, + count, top_diff, mask, top_mask, top[0]->num(), channels_, height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_, bottom_diff); break; -- 2.7.4