From: Evan Shelhamer Date: Thu, 5 Jun 2014 17:53:42 +0000 (-0700) Subject: padding for max pooling X-Git-Tag: submit/tizen/20180823.020014~653^2~126^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0d257e431404ad11df48816b85a94898bc86bf13;p=platform%2Fupstream%2Fcaffeonacl.git padding for max pooling Max pooling pads by -inf if the padding parameter is set. Padding for pooling, like padding for convolution, can preserve the dimensions of the bottom at the top. By setting the padding to floor(kernel_size / 2) the top output is the "same" instead of the "valid" part of the bottom input. --- diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index 5d6921f..80ce4df 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -32,9 +32,11 @@ void PoolingLayer::SetUp(const vector*>& bottom, stride_ = this->layer_param_.pooling_param().stride(); pad_ = this->layer_param_.pooling_param().pad(); if (pad_ != 0) { - CHECK_EQ(this->layer_param_.pooling_param().pool(), - PoolingParameter_PoolMethod_AVE) - << "Padding implemented only for average pooling."; + CHECK(this->layer_param_.pooling_param().pool() + == PoolingParameter_PoolMethod_AVE + || this->layer_param_.pooling_param().pool() + == PoolingParameter_PoolMethod_MAX) + << "Padding implemented only for average and max pooling."; } channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); @@ -92,10 +94,12 @@ Dtype PoolingLayer::Forward_cpu(const vector*>& bottom, for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { - int hstart = ph * stride_; - int wstart = pw * stride_; + int hstart = ph * stride_ - pad_; + int wstart = pw * stride_ - pad_; int hend = min(hstart + kernel_size_, height_); int wend = min(wstart + kernel_size_, width_); + hstart = max(hstart, 0); + wstart = max(wstart, 0); const int pool_index = ph * pooled_width_ + pw; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu index 31be47e..7bb397b 100644 --- a/src/caffe/layers/pooling_layer.cu +++ b/src/caffe/layers/pooling_layer.cu @@ -17,17 +17,19 @@ template __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, - const int kernel_size, const int stride, Dtype* top_data, + const int kernel_size, const int stride, const int pad, Dtype* top_data, int* mask, Dtype* top_mask) { CUDA_KERNEL_LOOP(index, nthreads) { int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; - int hstart = ph * stride; + int hstart = ph * stride - pad; + int wstart = pw * stride - pad; int hend = min(hstart + kernel_size, height); - int wstart = pw * stride; int wend = min(wstart + kernel_size, width); + hstart = max(hstart, 0); + wstart = max(wstart, 0); Dtype maxval = -FLT_MAX; int maxidx = -1; bottom_data += (n * channels + c) * height * width; @@ -170,7 +172,7 @@ Dtype PoolingLayer::Forward_gpu(const vector*>& bottom, MaxPoolForward<<>>( count, bottom_data, bottom[0]->num(), channels_, height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_, - top_data, mask, top_mask); + pad_, top_data, mask, top_mask); break; case PoolingParameter_PoolMethod_AVE: // NOLINT_NEXT_LINE(whitespace/operators) @@ -212,7 +214,7 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff, const int* mask, const Dtype* top_mask, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_size, const int stride, - Dtype* bottom_diff) { + const int pad, Dtype* bottom_diff) { CUDA_KERNEL_LOOP(index, nthreads) { // find out the local index // find out the local offset @@ -220,10 +222,12 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff, int h = (index / width) % height; int c = (index / width / height) % channels; int n = index / width / height / channels; - int phstart = (h < kernel_size) ? 0 : (h - kernel_size) / stride + 1; - int phend = min(h / stride + 1, pooled_height); - int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1; - int pwend = min(w / stride + 1, pooled_width); + int phstart = + (h + pad < kernel_size) ? 0 : (h + pad - kernel_size) / stride + 1; + int phend = min((h + pad) / stride + 1, pooled_height); + int pwstart = + (w + pad < kernel_size) ? 0 : (w + pad - kernel_size) / stride + 1; + int pwend = min((w + pad) / stride + 1, pooled_width); Dtype gradient = 0; int offset = (n * channels + c) * pooled_height * pooled_width; top_diff += offset; @@ -342,7 +346,7 @@ void PoolingLayer::Backward_gpu(const vector*>& top, MaxPoolBackward<<>>( count, top_diff, mask, top_mask, top[0]->num(), channels_, height_, width_, pooled_height_, pooled_width_, - kernel_size_, stride_, bottom_diff); + kernel_size_, stride_, pad_, bottom_diff); break; case PoolingParameter_PoolMethod_AVE: // NOLINT_NEXT_LINE(whitespace/operators) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 7d44085..e540a95 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -353,7 +353,8 @@ message PoolingParameter { optional PoolMethod pool = 1 [default = MAX]; // The pooling method optional uint32 kernel_size = 2; // The kernel size optional uint32 stride = 3 [default = 1]; // The stride - // The padding size -- currently implemented only for average pooling. + // The padding size -- currently implemented only for average and max pooling. + // average pooling zero pads. max pooling -inf pads. optional uint32 pad = 4 [default = 0]; }